From 3cd60e31185343d4132ca7cf3c9becb903b3ec1b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 4 Jun 2014 16:47:55 +0530 Subject: KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation We use time base for PURR and SPURR emulation with PR KVM since we are emulating a single threaded core. When using time base we need to make sure that we don't accumulate time spent in the host in PURR and SPURR value. Also we don't need to emulate mtspr because both the registers are hypervisor resource. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f52f656..a20cc0b 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 purr_offset; - u64 spurr_offset; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bb66d8b..4a58731 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -503,8 +503,8 @@ struct kvm_vcpu_arch { #ifdef CONFIG_BOOKE u32 decar; #endif - u32 tbl; - u32 tbu; + /* Time base value when we entered the guest */ + u64 entry_tb; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3f29526..3565e77 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; - case SPRN_PURR: - to_book3s(vcpu)->purr_offset = spr_val - get_tb(); - break; - case SPRN_SPURR: - to_book3s(vcpu)->spurr_offset = spr_val - get_tb(); - break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: @@ -572,10 +566,16 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = 0; break; case SPRN_PURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated purr + */ + *spr_val = vcpu->arch.purr; break; case SPRN_SPURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated spurr + */ + *spr_val = vcpu->arch.spurr; break; case SPRN_GQR0: case SPRN_GQR1: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8eef1e5..671f5c92 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -120,6 +120,11 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif + /* + * Now also save the current time base value. We use this + * to find the guest purr and spurr value. + */ + vcpu->arch.entry_tb = get_tb(); svcpu->in_use = true; } @@ -166,6 +171,12 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.shadow_fscr = svcpu->shadow_fscr; #endif + /* + * Update purr and spurr using time base on exit. + */ + vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; + svcpu->in_use = false; out: -- cgit v0.10.2 From 8f42ab2749d00ea15157ab896cfbed73a247b3e1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2014 17:38:02 +0530 Subject: KVM: PPC: BOOK3S: PR: Emulate virtual timebase register virtual time base register is a per VM, per cpu register that needs to be saved and restored on vm exit and entry. Writing to VTB is not allowed in the privileged mode. Signed-off-by: Aneesh Kumar K.V [agraf: fix compile error] Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a58731..bd3caea 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -505,6 +505,7 @@ struct kvm_vcpu_arch { #endif /* Time base value when we entered the guest */ u64 entry_tb; + u64 entry_vtb; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bffd89d..c8f3381 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1203,6 +1203,15 @@ : "r" ((unsigned long)(v)) \ : "memory") +static inline unsigned long mfvtb (void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfspr(SPRN_VTB); +#endif + return 0; +} + #ifdef __powerpc64__ #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1d428e60..03cbada 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,15 @@ static inline u64 get_rtc(void) return (u64)hi * 1000000000 + lo; } +static inline u64 get_vtb(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfvtb(); +#endif + return 0; +} + #ifdef CONFIG_PPC64 static inline u64 get_tb(void) { diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c254c27..ddce1ea 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -646,6 +646,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: val = get_reg_val(reg->id, vcpu->arch.bescr); break; + case KVM_REG_PPC_VTB: + val = get_reg_val(reg->id, vcpu->arch.vtb); + break; default: r = -EINVAL; break; @@ -750,6 +753,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(reg->id, val); + break; default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3565e77..1bb16a5 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -577,6 +577,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val */ *spr_val = vcpu->arch.spurr; break; + case SPRN_VTB: + *spr_val = vcpu->arch.vtb; + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7a12edb..315e884 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -897,9 +897,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IC: *val = get_reg_val(id, vcpu->arch.ic); break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -1097,9 +1094,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IC: vcpu->arch.ic = set_reg_val(id, *val); break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 671f5c92..d2deb9e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -125,6 +125,7 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, * to find the guest purr and spurr value. */ vcpu->arch.entry_tb = get_tb(); + vcpu->arch.entry_vtb = get_vtb(); svcpu->in_use = true; } @@ -176,7 +177,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, */ vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; - + vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; svcpu->in_use = false; out: -- cgit v0.10.2 From 06da28e76b87331ebccdb6d486cfd94835b8be5e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2014 17:38:05 +0530 Subject: KVM: PPC: BOOK3S: PR: Emulate instruction counter Writing to IC is not allowed in the privileged mode. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bd3caea..f9ae696 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -506,6 +506,7 @@ struct kvm_vcpu_arch { /* Time base value when we entered the guest */ u64 entry_tb; u64 entry_vtb; + u64 entry_ic; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ddce1ea..90aa5c7 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -649,6 +649,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_VTB: val = get_reg_val(reg->id, vcpu->arch.vtb); break; + case KVM_REG_PPC_IC: + val = get_reg_val(reg->id, vcpu->arch.ic); + break; default: r = -EINVAL; break; @@ -756,6 +759,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_VTB: vcpu->arch.vtb = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(reg->id, val); + break; default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1bb16a5..84fddcd 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -580,6 +580,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_VTB: *spr_val = vcpu->arch.vtb; break; + case SPRN_IC: + *spr_val = vcpu->arch.ic; + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 315e884..1562acf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -894,9 +894,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; - case KVM_REG_PPC_IC: - *val = get_reg_val(id, vcpu->arch.ic); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -1091,9 +1088,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ break; - case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d2deb9e..3da412e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -126,6 +126,8 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, */ vcpu->arch.entry_tb = get_tb(); vcpu->arch.entry_vtb = get_vtb(); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.entry_ic = mfspr(SPRN_IC); svcpu->in_use = true; } @@ -178,6 +180,8 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; out: -- cgit v0.10.2 From fb4188bad02f4871b26cf19b98e8d92499ca5d31 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 9 Jun 2014 01:16:32 +0200 Subject: KVM: PPC: Book3s PR: Disable AIL mode with OPAL When we're using PR KVM we must not allow the CPU to take interrupts in virtual mode, as the SLB does not contain host kernel mappings when running inside the guest context. To make sure we get good performance for non-KVM tasks but still properly functioning PR KVM, let's just disable AIL whenever a vcpu is scheduled in. This is fundamentally different from how we deal with AIL on pSeries type machines where we disable AIL for the whole machine as soon as a single KVM VM is up. The reason for that is easy - on pSeries we do not have control over per-cpu configuration of AIL. We also don't want to mess with CPU hotplug races and AIL configuration, so setting it per CPU is easier and more flexible. This patch fixes running PR KVM on POWER8 bare metal for me. Signed-off-by: Alexander Graf Acked-by: Paul Mackerras diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3da412e..8ea7da4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -71,6 +71,12 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->in_use = 0; svcpu_put(svcpu); #endif + + /* Disable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; @@ -91,6 +97,12 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + vcpu->cpu = -1; } -- cgit v0.10.2 From f6bf3a66227447f89f25b9db0ae39357decf2509 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 17:13:55 +0200 Subject: KVM: PPC: Book3s HV: Fix tlbie compile error Some compilers complain about uninitialized variables in the compute_tlbie_rb function. When you follow the code path you'll realize that we'll never get to that point, but the compiler isn't all that smart. So just default to 4k page sizes for everything, making the compiler happy and the code slightly easier to read. Signed-off-by: Alexander Graf Acked-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b..c7871f3 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -110,16 +110,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize) static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize, a_psize; + int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - if (!(v & HPTE_V_LARGE)) { - /* both base and actual psize is 4k */ - b_psize = MMU_PAGE_4K; - a_psize = MMU_PAGE_4K; - } else { + if (v & HPTE_V_LARGE) { for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { /* valid entries have a shift value */ -- cgit v0.10.2 From 568fccc43f901889b94b228cd0238916cb40e0bd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 16:37:38 +0200 Subject: KVM: PPC: Book3S PR: Handle hyp doorbell exits If we're running PR KVM in HV mode, we may get hypervisor doorbell interrupts. Handle those the same way we treat normal doorbells. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8ea7da4..3b82e86 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -988,6 +988,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_DECREMENTER: case BOOK3S_INTERRUPT_HV_DECREMENTER: case BOOK3S_INTERRUPT_DOORBELL: + case BOOK3S_INTERRUPT_H_DOORBELL: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; -- cgit v0.10.2 From 05a308c722822b0fbcc706b54be70f9bb9d52539 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:10 +1000 Subject: KVM: PPC: Book3S HV: Fix ABIv2 indirect branch issue To establish addressability quickly, ABIv2 requires the target address of the function being called to be in r12. Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 868347e..da1cac5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1913,8 +1913,8 @@ hcall_try_real_mode: lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont - add r3,r3,r4 - mtctr r3 + add r12,r3,r4 + mtctr r12 mr r3,r9 /* get vcpu pointer */ ld r4,VCPU_GPR(R4)(r9) bctrl -- cgit v0.10.2 From ad7d4584a225e3c1ac634dc803421fe842491ecf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:53 +1000 Subject: KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC() Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are assembly functions that are exported to modules and also require a valid r2. As such we need to use _GLOBAL_TOC so we provide a global entry point that establishes the TOC (r2). Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index da1cac5..64ac56f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675..4850a22 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -146,7 +146,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) -- cgit v0.10.2 From da166facd432d0edde509f941304915f9bed704d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:37:53 +0200 Subject: KVM: PPC: Book3S PR: Fix ABIv2 on LE We switched to ABIv2 on Little Endian systems now which gets rid of the dotted function names. Branch to the actual functions when we see such a system. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e3..d044b8b 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 4850a22..16c4d88 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) -- cgit v0.10.2 From f396df35188c59a5ecb83932190505ef297754e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 13:58:11 +0200 Subject: KVM: PPC: Book3S PR: Fix sparse endian checks While sending sparse with endian checks over the code base, it triggered at some places that were missing casts or had wrong types. Fix them up. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 52a63bf..f7c25c6 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) { long flags = kvmppc_get_gpr(vcpu, 4); long pte_index = kvmppc_get_gpr(vcpu, 5); - unsigned long pteg[2 * 8]; - unsigned long pteg_addr, i, *hpte; + __be64 pteg[2 * 8]; + __be64 *hpte; + unsigned long pteg_addr, i; long int ret; i = pte_index & 7; @@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); /* tsl = AVPN */ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; @@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) rb = compute_tlbie_rb(v, r, pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); - pte[0] = cpu_to_be64(pte[0]); - pte[1] = cpu_to_be64(pte[1]); + pte[0] = (__force u64)cpu_to_be64(pte[0]); + pte[1] = (__force u64)cpu_to_be64(pte[1]); copy_to_user((void __user *)pteg, pte, sizeof(pte)); ret = H_SUCCESS; -- cgit v0.10.2 From 1f0eeb7e1a88f46afa0f435cf7c34b0c84cf2394 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 18 Jun 2014 10:15:22 +0300 Subject: KVM: PPC: e500mc: Enhance tlb invalidation condition on vcpu schedule On vcpu schedule, the condition checked for tlb pollution is too loose. The tlb entries of a vcpu become polluted (vs stale) only when a different vcpu within the same logical partition runs in-between. Optimize the tlb invalidation condition keeping last_vcpu per logical partition id. With the new invalidation condition, a guest shows 4% performance improvement on P5020DS while running a memory stress application with the cpu oversubscribed, the other guest running a cpu intensive workload. Guest - old invalidation condition real 3.89 user 3.87 sys 0.01 Guest - enhanced invalidation condition real 3.75 user 3.73 sys 0.01 Host real 3.70 user 1.85 sys 0.00 The memory stress application accesses 4KB pages backed by 75% of available TLB0 entries: char foo[ENTRIES][4096] __attribute__ ((aligned (4096))); int main() { char bar; int i, j; for (i = 0; i < ITERATIONS; i++) for (j = 0; j < ENTRIES; j++) bar = foo[j][0]; return 0; } Signed-off-by: Mihai Caraman Reviewed-by: Scott Wood Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 17e4562..690499d 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); +static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) { @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_on_cpu) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_on_cpu) = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; } kvmppc_load_guest_fp(vcpu); -- cgit v0.10.2 From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 2 Jun 2014 11:02:59 +1000 Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling This provides a way for userspace controls which sPAPR hcalls get handled in the kernel. Each hcall can be individually enabled or disabled for in-kernel handling, except for H_RTAS. The exception for H_RTAS is because userspace can already control whether individual RTAS functions are handled in-kernel or not via the KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for H_RTAS is out of the normal sequence of hcall numbers. Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM. The args field of the struct kvm_enable_cap specifies the hcall number in args[0] and the enable/disable flag in args[1]; 0 means disable in-kernel handling (so that the hcall will always cause an exit to userspace) and 1 means enable. Enabling or disabling in-kernel handling of an hcall is effective across the whole VM. The ability for KVM_ENABLE_CAP to be used on a VM file descriptor on PowerPC is new, added by this commit. The KVM_CAP_ENABLE_CAP_VM capability advertises that this ability exists. When a VM is created, an initial set of hcalls are enabled for in-kernel handling. The set that is enabled is the set that have an in-kernel implementation at this point. Any new hcall implementations from this point onwards should not be added to the default set without a good reason. No distinction is made between real-mode and virtual-mode hcall implementations; the one setting controls them both. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe3649..5c54d19 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows: this function/index combination -6. Capabilities that can be enabled ------------------------------------ +6. Capabilities that can be enabled on vCPUs +-------------------------------------------- There are certain capabilities that change the behavior of the virtual CPU when enabled. To enable them, please see section 4.37. Below you can find a list of @@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd args[1] is the XICS CPU number (server ID) for this vcpu This capability connects the vcpu to an in-kernel XICS device. + + +7. Capabilities that can be enabled on VMs +------------------------------------------ + +There are certain capabilities that change the behavior of the virtual +machine when enabled. To enable them, please see section 4.37. Below +you can find a list of capabilities and what their effect on the VM +is when enabling them. + +The following information is provided along with the description: + + Architectures: which instruction set architectures provide this ioctl. + x86 includes both i386 and x86_64. + + Parameters: what parameters are accepted by the capability. + + Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) + are not detailed, but errors with specific meanings are. + + +7.1 KVM_CAP_PPC_ENABLE_HCALL + +Architectures: ppc +Parameters: args[0] is the sPAPR hcall number + args[1] is 0 to disable, 1 to enable in-kernel handling + +This capability controls whether individual sPAPR hypercalls (hcalls) +get handled by the kernel or not. Enabling or disabling in-kernel +handling of an hcall is effective across the VM. On creation, an +initial set of hcalls are enabled for in-kernel handling, which +consists of those hcalls for which in-kernel handlers were implemented +before this capability was implemented. If disabled, the kernel will +not to attempt to handle the hcall, but will always exit to userspace +to handle it. Note that it may not make sense to enable some and +disable others of a group of related hcalls, but KVM does not prevent +userspace from doing that. diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a20cc0b..052ab2a 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f9ae696..62b2cee 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -263,6 +264,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f5995a9..17ffcb4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -493,6 +493,7 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); + DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1562acf..cf445d2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -67,6 +67,8 @@ /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) +static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; int idx, rc; + if (req <= MAX_HCALL_OPCODE && + !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) + return RESUME_HOST; + switch (req) { case H_ENTER: idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); + /* Start out with the default set of hcalls enabled */ + memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, + sizeof(kvm->arch.enabled_hcalls)); + kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_REMOVE, + H_ENTER, + H_READ, + H_PROTECT, + H_BULK_REMOVE, + H_GET_TCE, + H_PUT_TCE, + H_SET_DABR, + H_SET_XDABR, + H_CEDE, + H_PROD, + H_CONFER, + H_REGISTER_VPA, +#ifdef CONFIG_KVM_XICS + H_EOI, + H_CPPR, + H_IPI, + H_IPOLL, + H_XIRR, + H_XIRR_X, +#endif + 0 +}; + +static void init_default_hcalls(void) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, default_enabled_hcalls); +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void) kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; + init_default_hcalls(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 64ac56f..33aaade 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1909,6 +1909,17 @@ hcall_try_real_mode: clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont + /* See if this hcall is enabled for in-kernel handling */ + ld r4, VCPU_KVM(r9) + srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ + sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ + add r4, r4, r0 + ld r0, KVM_ENABLED_HCALLS(r4) + rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ + srd r0, r0, r4 + andi. r0, r0, 1 + beq guest_exit_cont + /* Get pointer to handler, if any, and call it */ LOAD_REG_ADDR(r4, hcall_real_table) lwax r3,r3,r4 cmpwi r3,0 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3b82e86..123ac7d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) { mutex_init(&kvm->arch.hpt_mutex); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Start out with the default set of hcalls enabled */ + kvmppc_pr_init_default_hcalls(kvm); +#endif + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f7c25c6..eacaa6e 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + if (cmd <= MAX_HCALL_OPCODE && + !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) + return EMULATE_FAIL; + switch (cmd) { case H_ENTER: return kvmppc_h_pr_enter(vcpu); @@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } + + +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_ENTER, + H_REMOVE, + H_PROTECT, + H_BULK_REMOVE, + H_PUT_TCE, + H_CEDE, +#ifdef CONFIG_KVM_XICS + H_XIRR, + H_CPPR, + H_EOI, + H_IPI, + H_IPOLL, + H_XIRR_X, +#endif + 0 +}; + +void kvmppc_pr_init_default_hcalls(struct kvm *kvm) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls); +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 61c738a..3222a4d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: + case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif @@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + case KVM_CAP_PPC_ENABLE_HCALL: { + unsigned long hcall = cap->args[0]; + + r = -EINVAL; + if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || + cap->args[1] > 1) + break; + if (cap->args[1]) + set_bit(hcall / 4, kvm->arch.enabled_hcalls); + else + clear_bit(hcall / 4, kvm->arch.enabled_hcalls); + r = 0; + break; + } +#endif + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e11d8f1..0418b74 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_VM_ATTRIBUTES 101 #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 +#define KVM_CAP_PPC_ENABLE_HCALL 104 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From ae2113a4f1a6cd5a3cd3d75f394547922758e9ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 2 Jun 2014 11:03:00 +1000 Subject: KVM: PPC: Book3S: Allow only implemented hcalls to be enabled or disabled This adds code to check that when the KVM_CAP_PPC_ENABLE_HCALL capability is used to enable or disable in-kernel handling of an hcall, that the hcall is actually implemented by the kernel. If not an EINVAL error is returned. This also checks the default-enabled list of hcalls and prints a warning if any hcall there is not actually implemented. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5c54d19..6955318 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3039,3 +3039,7 @@ not to attempt to handle the hcall, but will always exit to userspace to handle it. Note that it may not make sense to enable some and disable others of a group of related hcalls, but KVM does not prevent userspace from doing that. + +If the hcall number specified is not one that has an in-kernel +implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL +error. diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 052ab2a..ceb70aa 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -146,6 +146,7 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache * extern int kvmppc_mmu_hpte_sysinit(void); extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); +extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); @@ -188,6 +189,8 @@ extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); +extern int kvmppc_hcall_impl_pr(unsigned long cmd); +extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9c89cdd..e2fd5a1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -228,7 +228,7 @@ struct kvmppc_ops { void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); - + int (*hcall_implemented)(unsigned long hcall); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 90aa5c7..bd75902 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -925,6 +925,11 @@ int kvmppc_core_check_processor_compat(void) return 0; } +int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) +{ + return kvm->arch.kvm_ops->hcall_implemented(hcall); +} + static int kvmppc_book3s_init(void) { int r; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cf445d2..c4377c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -645,6 +645,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvmppc_hcall_impl_hv(unsigned long cmd) +{ + switch (cmd) { + case H_CEDE: + case H_PROD: + case H_CONFER: + case H_REGISTER_VPA: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + + /* See if it's in the real-mode table */ + return kvmppc_hcall_impl_hv_realmode(cmd); +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -2451,9 +2473,13 @@ static unsigned int default_hcall_list[] = { static void init_default_hcalls(void) { int i; + unsigned int hcall; - for (i = 0; default_hcall_list[i]; ++i) - __set_bit(default_hcall_list[i] / 4, default_enabled_hcalls); + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_hv(hcall)); + __set_bit(hcall / 4, default_enabled_hcalls); + } } static struct kvmppc_ops kvm_ops_hv = { @@ -2488,6 +2514,7 @@ static struct kvmppc_ops kvm_ops_hv = { .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, + .hcall_implemented = kvmppc_hcall_impl_hv, }; static int kvmppc_book3s_init_hv(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cde8a6..3b41447 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -212,3 +212,16 @@ bool kvm_hv_mode_active(void) { return atomic_read(&hv_vm_count) != 0; } + +extern int hcall_real_table[], hcall_real_table_end[]; + +int kvmppc_hcall_impl_hv_realmode(unsigned long cmd) +{ + cmd /= 4; + if (cmd < hcall_real_table_end - hcall_real_table && + hcall_real_table[cmd]) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 33aaade..e66c1e38 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2042,6 +2042,7 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table + .globl hcall_real_table_end hcall_real_table_end: ignore_hdec: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 123ac7d..15fd6c2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1670,6 +1670,9 @@ static struct kvmppc_ops kvm_ops_pr = { .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, .fast_vcpu_kick = kvm_vcpu_kick, .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, +#ifdef CONFIG_PPC_BOOK3S_64 + .hcall_implemented = kvmppc_hcall_impl_pr, +#endif }; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index eacaa6e..6d0143f 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -309,6 +309,27 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } +int kvmppc_hcall_impl_pr(unsigned long cmd) +{ + switch (cmd) { + case H_ENTER: + case H_REMOVE: + case H_PROTECT: + case H_BULK_REMOVE: + case H_PUT_TCE: + case H_CEDE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + return 0; +} /* * List of hcall numbers to enable by default. @@ -337,7 +358,11 @@ static unsigned int default_hcall_list[] = { void kvmppc_pr_init_default_hcalls(struct kvm *kvm) { int i; + unsigned int hcall; - for (i = 0; default_hcall_list[i]; ++i) - __set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls); + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_pr(hcall)); + __set_bit(hcall / 4, kvm->arch.enabled_hcalls); + } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3222a4d..7efc2b7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1119,6 +1119,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || cap->args[1] > 1) break; + if (!kvmppc_book3s_hcall_implemented(kvm, hcall)) + break; if (cap->args[1]) set_bit(hcall / 4, kvm->arch.enabled_hcalls); else -- cgit v0.10.2 From 9642382e826066c2d30d1b23d1b45410cdd8e07d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 2 Jun 2014 11:03:01 +1000 Subject: KVM: PPC: Book3S HV: Add H_SET_MODE hcall handling This adds support for the H_SET_MODE hcall. This hcall is a multiplexer that has several functions, some of which are called rarely, and some which are potentially called very frequently. Here we add support for the functions that set the debug registers CIABR (Completed Instruction Address Breakpoint Register) and DAWR/DAWRX (Data Address Watchpoint Register and eXtension), since they could be updated by the guest as often as every context switch. This also adds a kvmppc_power8_compatible() function to test to see if a guest is compatible with POWER8 or not. The CIABR and DAWR/X only exist on POWER8. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 5dbbb29..85bc8c0 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -279,6 +279,12 @@ #define H_GET_24X7_DATA 0xF07C #define H_GET_PERF_COUNTER_INFO 0xF080 +/* Values for 2nd argument to H_SET_MODE */ +#define H_SET_MODE_RESOURCE_SET_CIABR 1 +#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 +#define H_SET_MODE_RESOURCE_LE 4 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c4377c7..7db9df2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -557,6 +557,48 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) + return true; + if ((!vcpu->arch.vcore->arch_compat) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return true; + return false; +} + +static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, + unsigned long resource, unsigned long value1, + unsigned long value2) +{ + switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (value2) + return H_P4; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + /* Guests can't breakpoint the hypervisor */ + if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) + return H_P3; + vcpu->arch.ciabr = value1; + return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr = value1; + vcpu->arch.dawrx = value2; + return H_SUCCESS; + default: + return H_TOO_HARD; + } +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -626,7 +668,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; - + case H_SET_MODE: + ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; case H_XIRR: case H_CPPR: case H_EOI: @@ -652,6 +701,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) case H_PROD: case H_CONFER: case H_REGISTER_VPA: + case H_SET_MODE: #ifdef CONFIG_KVM_XICS case H_XIRR: case H_CPPR: -- cgit v0.10.2 From d57cef91a0c30d3439a4d235eb94ab9efbf797a0 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 30 Jun 2014 15:54:58 +0300 Subject: KVM: PPC: e500: Fix default tlb for victim hint Tlb search operation used for victim hint relies on the default tlb set by the host. When hardware tablewalk support is enabled in the host, the default tlb is TLB1 which leads KVM to evict the bolted entry. Set and restore the default tlb when searching for victim hint. Signed-off-by: Mihai Caraman Reviewed-by: Scott Wood Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index d0918e0..8d24f78 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,7 +40,9 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -86,6 +88,7 @@ #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 +#define MAS4_TLBSEL_MASK MAS0_TLBSEL_MASK #define MAS4_TLBSELD(x) MAS0_TLBSEL(x) #define MAS4_INDD 0x00008000 /* Default IND */ #define MAS4_TSIZED(x) MAS1_TSIZE(x) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03..79677d7 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -107,11 +107,15 @@ static u32 get_host_mas0(unsigned long eaddr) { unsigned long flags; u32 mas0; + u32 mas4; local_irq_save(flags); mtspr(SPRN_MAS6, 0); + mas4 = mfspr(SPRN_MAS4); + mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK); asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); mas0 = mfspr(SPRN_MAS0); + mtspr(SPRN_MAS4, mas4); local_irq_restore(flags); return mas0; -- cgit v0.10.2 From 8f6822c4b9fac6e47414d2f1e11dbabda9bc2163 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:07:40 +0200 Subject: PPC: Add asm helpers for BE 32bit load/store From assembly code we might not only have to explicitly BE access 64bit values, but sometimes also 32bit ones. Add helpers that allow for easy use of lwzx/stwx in their respective byte-reverse or native form. Signed-off-by: Alexander Graf CC: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 4b237aa..21be8ae 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -34,10 +34,14 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ +#define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) +#define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else +#define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) +#define STWX_BE stringify_in_c(stwbrx) #define STDX_BE stringify_in_c(stdbrx) #endif -- cgit v0.10.2 From 6f22bd3265fb542acb2697026b953ec07298242d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:16:06 +0200 Subject: KVM: PPC: Book3S HV: Make HTAB code LE host aware When running on an LE host all data structures are kept in little endian byte order. However, the HTAB still needs to be maintained in big endian. So every time we access any HTAB we need to make sure we do so in the right byte order. Fix up all accesses to manually byte swap. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index ceb70aa..8ac5392 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -162,9 +162,9 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c7871f3..e504f88 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages; /* These bits are reserved in the guest view of the HPTE */ #define HPTE_GR_RESERVED HPTE_GR_MODIFIED -static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) +static inline long try_lock_hpte(__be64 *hpte, unsigned long bits) { unsigned long tmp, old; + __be64 be_lockbit, be_bits; + + /* + * We load/store in native endian, but the HTAB is in big endian. If + * we byte swap all data we apply on the PTE we're implicitly correct + * again. + */ + be_lockbit = cpu_to_be64(HPTE_V_HVLOCK); + be_bits = cpu_to_be64(bits); asm volatile(" ldarx %0,0,%2\n" " and. %1,%0,%3\n" " bne 2f\n" - " ori %0,%0,%4\n" + " or %0,%0,%4\n" " stdcx. %0,0,%2\n" " beq+ 2f\n" " mr %1,%3\n" "2: isync" : "=&r" (tmp), "=&r" (old) - : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) + : "r" (hpte), "r" (be_bits), "r" (be_lockbit) : "cc", "memory"); return old == 0; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8056107..2d154d9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -450,7 +450,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long slb_v; unsigned long pp, key; unsigned long v, gr; - unsigned long *hptep; + __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -473,13 +473,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_enable(); return -ENOENT; } - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hptep[0] & ~HPTE_V_HVLOCK; + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; gr = kvm->arch.revmap[index].guest_rpte; /* Unlock the HPTE */ asm volatile("lwsync" : : : "memory"); - hptep[0] = v; + hptep[0] = cpu_to_be64(v); preempt_enable(); gpte->eaddr = eaddr; @@ -583,7 +583,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long *hptep, hpte[3], r; + unsigned long hpte[3], r; + __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; unsigned long gpa, gfn, hva, pfn; @@ -606,16 +607,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; index = vcpu->arch.pgfault_index; - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; - hpte[1] = hptep[1]; + hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + hpte[1] = be64_to_cpu(hptep[1]); hpte[2] = r = rev->guest_rpte; asm volatile("lwsync" : : : "memory"); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); preempt_enable(); if (hpte[0] != vcpu->arch.pgfault_hpte[0] || @@ -731,8 +732,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || - rev->guest_rpte != hpte[2]) + if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || + be64_to_cpu(hptep[1]) != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -752,20 +754,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; r &= rcbits | ~(HPTE_R_R | HPTE_R_C); - if (hptep[0] & HPTE_V_VALID) { + if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) { /* HPTE was previously valid, so we need to invalidate it */ unlock_rmap(rmap); - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, index); /* don't lose previous R and C bits */ - r |= hptep[1] & (HPTE_R_R | HPTE_R_C); + r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } - hptep[1] = r; + hptep[1] = cpu_to_be64(r); eieio(); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) @@ -784,7 +786,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; out_unlock: - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); goto out_put; } @@ -860,7 +862,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long h, i, j; - unsigned long *hptep; + __be64 *hptep; unsigned long ptel, psize, rcbits; for (;;) { @@ -876,11 +878,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, * rmap chain lock. */ i = *rmapp & KVMPPC_RMAP_INDEX; - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); continue; } @@ -899,14 +901,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ ptel = rev[i].guest_rpte; - psize = hpte_page_size(hptep[0], ptel); - if ((hptep[0] & HPTE_V_VALID) && + psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { if (kvm->arch.using_mmu_notifiers) - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ - rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); + rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; if (rcbits & ~rev[i].guest_rpte) { rev[i].guest_rpte = ptel | rcbits; @@ -914,7 +916,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } } unlock_rmap(rmapp); - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } return 0; } @@ -961,7 +963,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long head, i, j; - unsigned long *hptep; + __be64 *hptep; int ret = 0; retry: @@ -977,23 +979,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* If this HPTE isn't referenced, ignore it */ - if (!(hptep[1] & HPTE_R_R)) + if (!(be64_to_cpu(hptep[1]) & HPTE_R_R)) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptep[1]) & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); if (!(rev[i].guest_rpte & HPTE_R_R)) { rev[i].guest_rpte |= HPTE_R_R; @@ -1001,7 +1004,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } ret = 1; } - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1035,7 +1038,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, do { hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; - if (hp[1] & HPTE_R_R) + if (be64_to_cpu(hp[1]) & HPTE_R_R) goto out; } while ((i = j) != head); } @@ -1075,7 +1078,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) unsigned long head, i, j; unsigned long n; unsigned long v, r; - unsigned long *hptep; + __be64 *hptep; int npages_dirty = 0; retry: @@ -1091,7 +1094,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + unsigned long hptep1; + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* @@ -1108,29 +1112,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) * Otherwise we need to do the tlbie even if C==0 in * order to pick up any delayed writeback of C. */ - if (!(hptep[1] & HPTE_R_C) && - (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) + hptep1 = be64_to_cpu(hptep[1]); + if (!(hptep1 & HPTE_R_C) && + (!hpte_is_writable(hptep1) || vcpus_running(kvm))) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK)) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if (!(hptep[0] & HPTE_V_VALID)) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) continue; /* need to make it temporarily absent so C is stable */ - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); - v = hptep[0]; - r = hptep[1]; + v = be64_to_cpu(hptep[0]); + r = be64_to_cpu(hptep[1]); if (r & HPTE_R_C) { - hptep[1] = r & ~HPTE_R_C; + hptep[1] = cpu_to_be64(r & ~HPTE_R_C); if (!(rev[i].guest_rpte & HPTE_R_C)) { rev[i].guest_rpte |= HPTE_R_C; note_hpte_modification(kvm, &rev[i]); @@ -1143,7 +1148,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); v |= HPTE_V_VALID; - hptep[0] = v; + hptep[0] = cpu_to_be64(v); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1307,7 +1312,7 @@ struct kvm_htab_ctx { * Returns 1 if this HPT entry has been modified or has pending * R/C bit changes. */ -static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp) { unsigned long rcbits_unset; @@ -1316,13 +1321,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) /* Also need to consider changes in reference and changed bits */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptp[1]) & rcbits_unset)) return 1; return 0; } -static long record_hpte(unsigned long flags, unsigned long *hptp, +static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { @@ -1337,10 +1343,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, return 0; valid = 0; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) { valid = 1; if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && - !(hptp[0] & HPTE_V_BOLTED)) + !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED)) valid = 0; } if (valid != want_valid) @@ -1352,7 +1358,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, preempt_disable(); while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); - v = hptp[0]; + v = be64_to_cpu(hptp[0]); /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1360,9 +1366,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & hptp[1])) { - revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | - HPTE_GR_MODIFIED; + if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { + revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1381,13 +1387,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, revp->guest_rpte = r; } asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hptp[0] &= ~HPTE_V_HVLOCK; + hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); if (!(valid == want_valid && (first_pass || dirty))) ok = 0; } - hpte[0] = v; - hpte[1] = r; + hpte[0] = cpu_to_be64(v); + hpte[1] = cpu_to_be64(r); return ok; } @@ -1397,7 +1403,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, struct kvm_htab_ctx *ctx = file->private_data; struct kvm *kvm = ctx->kvm; struct kvm_get_htab_header hdr; - unsigned long *hptp; + __be64 *hptp; struct revmap_entry *revp; unsigned long i, nb, nw; unsigned long __user *lbuf; @@ -1413,7 +1419,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, flags = ctx->flags; i = ctx->index; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); revp = kvm->arch.revmap + i; lbuf = (unsigned long __user *)buf; @@ -1497,7 +1503,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, unsigned long i, j; unsigned long v, r; unsigned long __user *lbuf; - unsigned long *hptp; + __be64 *hptp; unsigned long tmp[2]; ssize_t nb; long int err, ret; @@ -1539,7 +1545,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) break; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { err = -EFAULT; @@ -1551,7 +1557,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, lbuf += 2; nb += HPTE_SIZE; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); err = -EIO; ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, @@ -1577,7 +1583,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, } for (j = 0; j < hdr.n_invalid; ++j) { - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); ++i; hptp += 2; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e62243..e5c6063 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); } -static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) +static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) { asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hpte[0] = hpte_v; + hpte[0] = cpu_to_be64(hpte_v); } long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; @@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { - if ((*hpte & HPTE_V_VALID) == 0 && + if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; @@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, */ hpte -= 16; for (i = 0; i < 8; ++i) { + u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) + pte = be64_to_cpu(*hpte); + if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; - *hpte &= ~HPTE_V_HVLOCK; + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); hpte += 2; } if (i == 8) @@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte_index += i; } else { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ + u64 pte; + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { - *hpte &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(*hpte); + if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_PTEG_FULL; } } @@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } - hpte[1] = ptel; + hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); - hpte[0] = pteh; + hpte[0] = cpu_to_be64(pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; @@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) { - unsigned long *hpte; + __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || - ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (pte & avpn) != 0)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - v = hpte[0] & ~HPTE_V_HVLOCK; + v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - hpte[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(v, hpte[1], pte_index); + u64 pte1; + + pte1 = be64_to_cpu(hpte[1]); + hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(v, pte1, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); + remove_revmap_chain(kvm, pte_index, rev, v, pte1); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); @@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; unsigned long *args = &vcpu->arch.gpr[4]; - unsigned long *hp, *hptes[4], tlbrb[4]; + __be64 *hp, *hptes[4]; + unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; + u64 hp0; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - hp = (unsigned long *) - (kvm->arch.hpt_virt + (pte_index << 4)); + hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); /* to avoid deadlock, don't spin except for first */ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { if (n) @@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) cpu_relax(); } found = 0; - if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { + hp0 = be64_to_cpu(hp[0]); + if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ found = 1; break; case 1: /* andcond */ - if (!(hp[0] & args[j + 1])) + if (!(hp0 & args[j + 1])) found = 1; break; case 2: /* AVPN */ - if ((hp[0] & ~0x7fUL) == args[j + 1]) + if ((hp0 & ~0x7fUL) == args[j + 1]) found = 1; break; } } if (!found) { - hp[0] &= ~HPTE_V_HVLOCK; + hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); args[j] = ((0x90 | flags) << 56) + pte_index; continue; } @@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); note_hpte_modification(kvm, rev); - if (!(hp[0] & HPTE_V_VALID)) { + if (!(hp0 & HPTE_V_VALID)) { /* insert R and C bits from PTE */ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); @@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) continue; } - hp[0] &= ~HPTE_V_VALID; /* leave it locked */ - tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); + /* leave it locked */ + hp[0] &= ~cpu_to_be64(HPTE_V_VALID); + tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), + be64_to_cpu(hp[1]), pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) pte_index = args[j] & ((1ul << 56) - 1); hp = hptes[k]; rev = revs[k]; - remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); + remove_revmap_chain(kvm, pte_index, rev, + be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; @@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long va) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } - v = hpte[0]; + v = pte; bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (hpte[1] & ~mask) | bits; + r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = v & ~HPTE_V_VALID; + hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest @@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } } } - hpte[1] = r; + hpte[1] = cpu_to_be64(r); eieio(); - hpte[0] = v & ~HPTE_V_HVLOCK; + hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } @@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte, v, r; + __be64 *hpte; + unsigned long v, r; int i, n = 1; struct revmap_entry *rev = NULL; @@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); for (i = 0; i < n; ++i, ++pte_index) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } -void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; - hptep[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; unsigned char rbyte; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - rbyte = (hptep[1] & ~HPTE_R_R) >> 8; + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); + rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; do_tlbies(kvm, &rb, 1, 1, false); @@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long somask; unsigned long vsid, hash; unsigned long avpn; - unsigned long *hpte; + __be64 *hpte; unsigned long mask, val; unsigned long v, r; @@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ - v = hpte[i] & ~HPTE_V_HVLOCK; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -810,8 +830,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = hpte[i] & ~HPTE_V_HVLOCK; - r = hpte[i+1]; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[i+1]); /* * Check the HPTE again, including large page size @@ -825,7 +845,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, return (hash << 3) + (i >> 1); /* Unlock and move on */ - hpte[i] = v; + hpte[i] = cpu_to_be64(v); } if (val & HPTE_V_SECONDARY) @@ -854,7 +874,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct kvm *kvm = vcpu->kvm; long int index; unsigned long v, r, gr; - unsigned long *hpte; + __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; @@ -870,9 +890,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return status; /* there really was no HPTE */ return 0; /* for prot fault, HPTE disappeared */ } - hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); rev = real_vmalloc_addr(&kvm->arch.revmap[index]); gr = rev->guest_rpte; -- cgit v0.10.2 From 02407552256111479fbfd23a3e01218b399aaa35 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:34:19 +0200 Subject: KVM: PPC: Book3S HV: Access guest VPA in BE There are a few shared data structures between the host and the guest. Most of them get registered through the VPA interface. These data structures are defined to always be in big endian byte order, so let's make sure we always access them in big endian. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7db9df2..f1281c4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -272,7 +272,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) { vpa->__old_status |= LPPACA_OLD_SHARED_PROC; - vpa->yield_count = 1; + vpa->yield_count = cpu_to_be32(1); } static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, @@ -295,8 +295,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, struct reg_vpa { u32 dummy; union { - u16 hword; - u32 word; + __be16 hword; + __be32 word; } length; }; @@ -335,9 +335,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (va == NULL) return H_PARAMETER; if (subfunc == H_VPA_REG_VPA) - len = ((struct reg_vpa *)va)->length.hword; + len = be16_to_cpu(((struct reg_vpa *)va)->length.hword); else - len = ((struct reg_vpa *)va)->length.word; + len = be32_to_cpu(((struct reg_vpa *)va)->length.word); kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ @@ -542,18 +542,18 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, return; memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; - dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = now + vc->tb_offset; - dt->enqueue_to_dispatch_time = stolen; - dt->srr0 = kvmppc_get_pc(vcpu); - dt->srr1 = vcpu->arch.shregs.msr; + dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); + dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); + dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_ptr = dt; /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); - vpa->dtl_idx = ++vcpu->arch.dtl_index; + vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); vcpu->arch.dtl.dirty = true; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 3a5c568..d562c8e 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu) return; /* Sanity check */ - n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end) return; /* Load up the SLB from that */ for (i = 0; i < n; ++i) { - unsigned long rb = slb->save_area[i].esid; - unsigned long rs = slb->save_area[i].vsid; + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); rb = (rb & ~0xFFFul) | i; /* insert entry number */ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); -- cgit v0.10.2 From 0865a583a4881975cc4b621f4886c02f01600302 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:36:17 +0200 Subject: KVM: PPC: Book3S HV: Access host lppaca and shadow slb in BE Some data structures are always stored in big endian. Among those are the LPPACA fields as well as the shadow slb. These structures might be shared with a hypervisor. So whenever we access those fields, make sure we do so in big endian byte order. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e66c1e38..bf5270e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,10 +32,6 @@ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) -#ifdef __LITTLE_ENDIAN__ -#error Need to fix lppaca and SLB shadow accesses in little endian mode -#endif - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -595,9 +591,10 @@ kvmppc_got_guest: ld r3, VCPU_VPA(r4) cmpdi r3, 0 beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) + li r6, LPPACA_YIELDCOUNT + LWZX_BE r5, r3, r6 addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) + STWX_BE r5, r3, r6 li r6, 1 stb r6, VCPU_VPA_DIRTY(r4) 25: @@ -1442,9 +1439,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) + li r4, LPPACA_YIELDCOUNT + LWZX_BE r3, r8, r4 addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) + STWX_BE r3, r8, r4 li r3, 1 stb r3, VCPU_VPA_DIRTY(r9) 25: @@ -1757,8 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r8) - ld r6,SLBSHADOW_SAVEAREA+8(r8) + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 andis. r7,r5,SLB_ESID_V@h beq 1f slbmte r6,r5 -- cgit v0.10.2 From 76d072fb05f646eb180f161bbe06ab185af52f38 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:37:52 +0200 Subject: KVM: PPC: Book3S HV: Access XICS in BE On the exit path from the guest we check what type of interrupt we received if we received one. This means we're doing hardware access to the XICS interrupt controller. However, when running on a little endian system, this access is byte reversed. So let's make sure to swizzle the bytes back again and virtually make XICS accesses big endian. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bf5270e..364ca0c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2350,7 +2350,18 @@ kvmppc_read_intr: cmpdi r6, 0 beq- 1f lwzcix r0, r6, r7 - rlwinm. r3, r0, 0, 0xffffff + /* + * Save XIRR for later. Since we get in in reverse endian on LE + * systems, save it byte reversed and fetch it back in host endian. + */ + li r3, HSTATE_SAVED_XIRR + STWX_BE r0, r3, r13 +#ifdef __LITTLE_ENDIAN__ + lwz r3, HSTATE_SAVED_XIRR(r13) +#else + mr r3, r0 +#endif + rlwinm. r3, r3, 0, 0xffffff sync beq 1f /* if nothing pending in the ICP */ @@ -2382,10 +2393,9 @@ kvmppc_read_intr: li r3, -1 1: blr -42: /* It's not an IPI and it's for the host, stash it in the PACA - * before exit, it will be picked up by the host ICP driver +42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in + * the PACA earlier, it will be picked up by the host ICP driver */ - stw r0, HSTATE_SAVED_XIRR(r13) li r3, 1 b 1b -- cgit v0.10.2 From 9bf163f86d0dc2f9070d9b1b8c27cedcf8eec816 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:41:15 +0200 Subject: KVM: PPC: Book3S HV: Fix ABIv2 on LE For code that doesn't live in modules we can just branch to the real function names, giving us compatibility with ABIv1 and ABIv2. Do this for the compiled-in code of HV KVM. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 364ca0c..855521e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -668,9 +668,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) mr r31, r4 addi r3, r31, VCPU_FPRS_TM - bl .load_fp_state + bl load_fp_state addi r3, r31, VCPU_VRS_TM - bl .load_vr_state + bl load_vr_state mr r4, r31 lwz r7, VCPU_VRSAVE_TM(r4) mtspr SPRN_VRSAVE, r7 @@ -1414,9 +1414,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Save FP/VSX. */ addi r3, r9, VCPU_FPRS_TM - bl .store_fp_state + bl store_fp_state addi r3, r9, VCPU_VRS_TM - bl .store_vr_state + bl store_vr_state mfspr r6, SPRN_VRSAVE stw r6, VCPU_VRSAVE_TM(r9) 1: @@ -2430,11 +2430,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r3,VCPU_FPRS - bl .store_fp_state + bl store_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .store_vr_state + bl store_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE @@ -2466,11 +2466,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r4,VCPU_FPRS - bl .load_fp_state + bl load_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .load_vr_state + bl load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif lwz r7,VCPU_VRSAVE(r31) -- cgit v0.10.2 From 6947f948f06128409b94306afaca5ece873ee5a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:39:38 +0200 Subject: KVM: PPC: Book3S HV: Enable for little endian hosts Now that we've fixed all the issues that HV KVM code had on little endian hosts, we can enable it in the kernel configuration for users to play with. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index d6a53b9..8aeeda1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -75,7 +75,6 @@ config KVM_BOOK3S_64 config KVM_BOOK3S_64_HV tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 - depends on !CPU_LITTLE_ENDIAN select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA -- cgit v0.10.2 From debf27d6b92d7a98e0153ca8e3a990ea7a45b4da Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Fri, 4 Jul 2014 11:17:28 +0300 Subject: KVM: PPC: e500: Emulate power management control SPR For FSL e6500 core the kernel uses power management SPR register (PWRMGTCR0) to enable idle power down for cores and devices by setting up the idle count period at boot time. With the host already controlling the power management configuration the guest could simply benefit from it, so emulate guest request as a general store. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 62b2cee..faf2f0e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -584,6 +584,7 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 002d517..c99c40e 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -250,6 +250,14 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va spr_val); break; + case SPRN_PWRMGTCR0: + /* + * Guest relies on host power management configurations + * Treat the request as a general store + */ + vcpu->arch.pwrmgtcr0 = spr_val; + break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; @@ -368,6 +376,10 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v *spr_val = vcpu->arch.eptcfg; break; + case SPRN_PWRMGTCR0: + *spr_val = vcpu->arch.pwrmgtcr0; + break; + /* extra exceptions */ case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; -- cgit v0.10.2 From 1287cb3fa85cd4a0d18402f6a23e1d4c6a9d7b8b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jul 2014 12:52:51 +0200 Subject: KVM: PPC: Book3S: Move vcore definition to end of kvm_arch struct When building KVM with a lot of vcores (NR_CPUS is big), we can potentially get out of the ld immediate range for dereferences inside that struct. Move the array to the end of our kvm_arch struct. This fixes compilation issues with NR_CPUS=2048 for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index faf2f0e..855ba4d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,7 +255,6 @@ struct kvm_arch { atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; - struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -273,6 +272,10 @@ struct kvm_arch { struct kvmppc_xics *xics; #endif struct kvmppc_ops *kvm_ops; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* This array can grow quite large, keep it at the end */ + struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; +#endif }; /* -- cgit v0.10.2 From 17824b5afcf273f6fc3e04df2d2a9d90d5c864fd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 10 Jul 2014 19:19:35 +0200 Subject: KVM: PPC: Deflect page write faults properly in kvmppc_st When we have a page that we're not allowed to write to, xlate() will already tell us -EPERM on lookup of that page. With the code as is we change it into a "page missing" error which a guest may get confused about. Instead, just tell the caller about the -EPERM directly. This fixes Mac OS X guests when run with DCBZ32 emulation. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index bd75902..9624c56 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -418,11 +418,13 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; + int r; vcpu->stat.st++; - if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) - return -ENOENT; + r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte); + if (r < 0) + return r; *eaddr = pte.raddr; -- cgit v0.10.2 From 2e27ecc961044a2c5c05a4283888352961886a87 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 10 Jul 2014 19:22:03 +0200 Subject: KVM: PPC: Book3S: Stop PTE lookup on write errors When a page lookup failed because we're not allowed to write to the page, we should not overwrite that value with another lookup on the second PTEG which will return "page not found". Instead, we should just tell the caller that we had a permission problem. This fixes Mac OS X guests looping endlessly in page lookup code for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 93503bb..cd0b073 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, true); - if (r < 0) + if (r == -ENOENT) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, false); -- cgit v0.10.2 From c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 11 Jul 2014 02:58:58 +0200 Subject: KVM: PPC: Book3S: Add hack for split real mode Today we handle split real mode by mapping both instruction and data faults into a special virtual address space that only exists during the split mode phase. This is good enough to catch 32bit Linux guests that use split real mode for copy_from/to_user. In this case we're always prefixed with 0xc0000000 for our instruction pointer and can map the user space process freely below there. However, that approach fails when we're running KVM inside of KVM. Here the 1st level last_inst reader may well be in the same virtual page as a 2nd level interrupt handler. It also fails when running Mac OS X guests. Here we have a 4G/4G split, so a kernel copy_from/to_user implementation can easily overlap with user space addresses. The architecturally correct way to fix this would be to implement an instruction interpreter in KVM that kicks in whenever we go into split real mode. This interpreter however would not receive a great amount of testing and be a lot of bloat for a reasonably isolated corner case. So I went back to the drawing board and tried to come up with a way to make split real mode work with a single flat address space. And then I realized that we could get away with the same trick that makes it work for Linux: Whenever we see an instruction address during split real mode that may collide, we just move it higher up the virtual address space to a place that hopefully does not collide (keep your fingers crossed!). That approach does work surprisingly well. I am able to successfully run Mac OS X guests with KVM and QEMU (no split real mode hacks like MOL) when I apply a tiny timing probe hack to QEMU. I'd say this is a win over even more broken split real mode :). Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 9601741..3f3e530 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -131,6 +131,7 @@ #define BOOK3S_HFLAG_NATIVE_PS 0x8 #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 #define BOOK3S_HFLAG_NEW_TLBIE 0x20 +#define BOOK3S_HFLAG_SPLIT_HACK 0x40 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8ac5392..b1cf18d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -324,4 +324,7 @@ static inline bool is_kvmppc_resume_guest(int r) /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) +#define SPLIT_HACK_MASK 0xff000000 +#define SPLIT_HACK_OFFS 0xfb000000 + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9624c56..1d13764 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} +EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { if (!is_kvmppc_hv_enabled(vcpu->kvm)) @@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { + kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); @@ -384,6 +396,13 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, pte->may_write = true; pte->may_execute = true; r = 0; + + if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR && + !data) { + if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte->raddr &= ~SPLIT_HACK_MASK; + } } return r; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 15fd6c2..6125f60 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define HW_PAGE_SIZE PAGE_SIZE #endif +static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + return (msr & (MSR_IR|MSR_DR)) == MSR_DR; +} + +static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + ulong pc = kvmppc_get_pc(vcpu); + + /* We are in DR only split real mode */ + if ((msr & (MSR_IR|MSR_DR)) != MSR_DR) + return; + + /* We have not fixed up the guest already */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) + return; + + /* The code is in fixupable address space */ + if (pc & SPLIT_HACK_MASK) + return; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK; + kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); +} + +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -81,6 +110,9 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif + + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -95,6 +127,9 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif + if (kvmppc_is_split_real(vcpu)) + kvmppc_unfixup_split_real(vcpu); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); @@ -322,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) } } + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + else + kvmppc_unfixup_split_real(vcpu); + if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); @@ -522,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; case MSR_DR: + if (!data && + (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte.raddr &= ~SPLIT_HACK_MASK; + /* fall through */ case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -886,6 +931,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ -- cgit v0.10.2 From 89b68c96a24f6520c8815f88254c8e7d09aeb40e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 16:37:12 +0200 Subject: KVM: PPC: Book3S: Make magic page properly 4k mappable The magic page is defined as a 4k page of per-vCPU data that is shared between the guest and the host to accelerate accesses to privileged registers. However, when the host is using 64k page size granularity we weren't quite as strict about that rule anymore. Instead, we partially treated all of the upper 64k as magic page and mapped only the uppermost 4k with the actual magic contents. This works well enough for Linux which doesn't use any memory in kernel space in the upper 64k, but Mac OS X got upset. So this patch makes magic page actually stay in a 4k range even on 64k page size hosts. This patch fixes magic page usage with Mac OS X (using MOL) on 64k PAGE_SIZE hosts for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index b1cf18d..20fb6f2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -158,7 +158,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 1d13764..31facfc 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -354,18 +354,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable) { - ulong mp_pa = vcpu->arch.magic_page_pa; + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; + gfn_t gfn = gpa >> PAGE_SHIFT; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; /* Magic page override */ - if (unlikely(mp_pa) && - unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == - ((mp_pa & PAGE_MASK) & KVM_PAM))) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) { ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; pfn_t pfn; @@ -378,7 +378,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } -EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); +EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, bool iswrite, struct kvmppc_pte *pte) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 678e753..2035d16 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, - iswrite, &writable); + hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(hpaddr)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", - orig_pte->eaddr); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0ac9839..b982d92 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, smp_rmb(); /* Get host physical address for gpa */ - pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); + pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(pfn)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 6125f60..e40765f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -511,19 +511,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; - if (unlikely(mp_pa) && - unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { return 1; } - return kvm_is_visible_gfn(vcpu->kvm, gfn); + return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -614,7 +614,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && - kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + kvmppc_visible_gpa(vcpu, pte.raddr)) { if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { /* * There is already a host HPTE there, presumably @@ -1387,8 +1387,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) goto uninit_vcpu; - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); + vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7efc2b7..fe0257a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) vcpu->arch.magic_page_pa = param1 & ~0xfffULL; vcpu->arch.magic_page_ea = param2 & ~0xfffULL; +#ifdef CONFIG_PPC_64K_PAGES + /* + * Make sure our 4k magic page is in the same window of a 64k + * page within the guest and within the host's page. + */ + if ((vcpu->arch.magic_page_pa & 0xf000) != + ((ulong)vcpu->arch.shared & 0xf000)) { + void *old_shared = vcpu->arch.shared; + ulong shared = (ulong)vcpu->arch.shared; + void *new_shared; + + shared &= PAGE_MASK; + shared |= vcpu->arch.magic_page_pa & 0xf000; + new_shared = (void*)shared; + memcpy(new_shared, old_shared, 0x1000); + vcpu->arch.shared = new_shared; + } +#endif + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; r = EV_SUCCESS; -- cgit v0.10.2 From 1dc0c5b88cae1c211b37fed9187379a692bb469b Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:35 +0530 Subject: kvm: ppc: bookehv: Added wrapper macros for shadow registers There are shadow registers like, GSPRG[0-3], GSRR0, GSRR1 etc on BOOKE-HV and these shadow registers are guest accessible. So these shadow registers needs to be updated on BOOKE-HV. This patch adds new macro for get/set helper of shadow register . Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e2fd5a1..6520d09 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -472,8 +472,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } +#define SPRNG_WRAPPER_GET(reg, e500hv_spr) \ +static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + return mfspr(e500hv_spr); \ +} \ + +#define SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ +{ \ + mtspr(e500hv_spr, val); \ +} \ + #define SHARED_WRAPPER_GET(reg, size) \ -static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ return be##size##_to_cpu(vcpu->arch.shared->reg); \ @@ -494,14 +506,30 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ +#define SPRNG_WRAPPER(reg, e500hv_spr) \ + SPRNG_WRAPPER_GET(reg, e500hv_spr) \ + SPRNG_WRAPPER_SET(reg, e500hv_spr) \ + +#ifdef CONFIG_KVM_BOOKE_HV + +#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ + SPRNG_WRAPPER(reg, e500hv_spr) \ + +#else + +#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ + SHARED_WRAPPER(reg, size) \ + +#endif + SHARED_WRAPPER(critical, 64) -SHARED_WRAPPER(sprg0, 64) -SHARED_WRAPPER(sprg1, 64) -SHARED_WRAPPER(sprg2, 64) -SHARED_WRAPPER(sprg3, 64) -SHARED_WRAPPER(srr0, 64) -SHARED_WRAPPER(srr1, 64) -SHARED_WRAPPER(dar, 64) +SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) +SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) +SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) +SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) +SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) +SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) +SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { -- cgit v0.10.2 From 31579eea69c8088685a4dc82784ca839cfd5ae73 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:36 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers of SRR0 and SRR1 Use kvmppc_set_srr0/srr1() and kvmppc_get_srr0/srr1() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ab62109..3b43adb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -266,13 +266,8 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GSRR0, srr0); - mtspr(SPRN_GSRR1, srr1); -#else - vcpu->arch.shared->srr0 = srr0; - vcpu->arch.shared->srr1 = srr1; -#endif + kvmppc_set_srr0(vcpu, srr0); + kvmppc_set_srr1(vcpu, srr1); } static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) @@ -1265,8 +1260,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; - regs->srr0 = vcpu->arch.shared->srr0; - regs->srr1 = vcpu->arch.shared->srr1; + regs->srr0 = kvmppc_get_srr0(vcpu); + regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; regs->sprg0 = vcpu->arch.shared->sprg0; regs->sprg1 = vcpu->arch.shared->sprg1; @@ -1293,8 +1288,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.shared->srr0 = regs->srr0; - vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_srr0(vcpu, regs->srr0); + kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; -- cgit v0.10.2 From a5414d4b5ed8a2b10a1c8fe84f30ed4f19d0a35e Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:37 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers of SPRN_DEAR Uses kvmppc_set_dar() and kvmppc_get_dar() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3b43adb..8e8b14b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GDEAR); -#else - return vcpu->arch.shared->dar; -#endif -} - -static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GDEAR, dear); -#else - vcpu->arch.shared->dar = dear; -#endif -} - static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_BOOKE_HV @@ -447,7 +429,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (update_esr == true) set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - set_guest_dear(vcpu, vcpu->arch.queued_dear); + kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); @@ -1317,7 +1299,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; sregs->u.e.esr = get_guest_esr(vcpu); - sregs->u.e.dear = get_guest_dear(vcpu); + sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -1335,7 +1317,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; set_guest_esr(vcpu, sregs->u.e.esr); - set_guest_dear(vcpu, sregs->u.e.dear); + kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); -- cgit v0.10.2 From dc168549d9a0fb55d3021ee408adf25786cfda23 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:38 +0530 Subject: kvm: ppc: booke: Add shared struct helpers of SPRN_ESR Add and use kvmppc_set_esr() and kvmppc_get_esr() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6520d09..c95bdbd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -530,6 +530,7 @@ SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) +SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8e8b14b..25a7e70 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GESR); -#else - return vcpu->arch.shared->esr; -#endif -} - -static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GESR, esr); -#else - vcpu->arch.shared->esr = esr; -#endif -} - static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_BOOKE_HV @@ -427,7 +409,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - set_guest_esr(vcpu, vcpu->arch.queued_esr); + kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { @@ -1298,7 +1280,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = get_guest_esr(vcpu); + sregs->u.e.esr = kvmppc_get_esr(vcpu); sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; @@ -1316,7 +1298,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - set_guest_esr(vcpu, sregs->u.e.esr); + kvmppc_set_esr(vcpu, sregs->u.e.esr); kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); -- cgit v0.10.2 From c1b8a01bf959e87cff9c9dcaf51cb5c1d60a2c52 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:39 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers for SPRN_SPRG0-7 Use kvmppc_set_sprg[0-7]() and kvmppc_get_sprg[0-7]() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 25a7e70..34562d4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1227,14 +1227,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->srr0 = kvmppc_get_srr0(vcpu); regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.shared->sprg0; - regs->sprg1 = vcpu->arch.shared->sprg1; - regs->sprg2 = vcpu->arch.shared->sprg2; - regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.shared->sprg4; - regs->sprg5 = vcpu->arch.shared->sprg5; - regs->sprg6 = vcpu->arch.shared->sprg6; - regs->sprg7 = vcpu->arch.shared->sprg7; + regs->sprg0 = kvmppc_get_sprg0(vcpu); + regs->sprg1 = kvmppc_get_sprg1(vcpu); + regs->sprg2 = kvmppc_get_sprg2(vcpu); + regs->sprg3 = kvmppc_get_sprg3(vcpu); + regs->sprg4 = kvmppc_get_sprg4(vcpu); + regs->sprg5 = kvmppc_get_sprg5(vcpu); + regs->sprg6 = kvmppc_get_sprg6(vcpu); + regs->sprg7 = kvmppc_get_sprg7(vcpu); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -1255,14 +1255,14 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_srr0(vcpu, regs->srr0); kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); - vcpu->arch.shared->sprg0 = regs->sprg0; - vcpu->arch.shared->sprg1 = regs->sprg1; - vcpu->arch.shared->sprg2 = regs->sprg2; - vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.shared->sprg4 = regs->sprg4; - vcpu->arch.shared->sprg5 = regs->sprg5; - vcpu->arch.shared->sprg6 = regs->sprg6; - vcpu->arch.shared->sprg7 = regs->sprg7; + kvmppc_set_sprg0(vcpu, regs->sprg0); + kvmppc_set_sprg1(vcpu, regs->sprg1); + kvmppc_set_sprg2(vcpu, regs->sprg2); + kvmppc_set_sprg3(vcpu, regs->sprg3); + kvmppc_set_sprg4(vcpu, regs->sprg4); + kvmppc_set_sprg5(vcpu, regs->sprg5); + kvmppc_set_sprg6(vcpu, regs->sprg6); + kvmppc_set_sprg7(vcpu, regs->sprg7); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 27a4b28..28c1588 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -165,16 +165,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) * guest (PR-mode only). */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; + kvmppc_set_sprg4(vcpu, spr_val); break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; + kvmppc_set_sprg5(vcpu, spr_val); break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; + kvmppc_set_sprg6(vcpu, spr_val); break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; + kvmppc_set_sprg7(vcpu, spr_val); break; case SPRN_IVPR: -- cgit v0.10.2 From 34f754b99e2f642c661967b456764b2c7ccc096e Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:40 +0530 Subject: kvm: ppc: Add SPRN_EPR get helper function kvmppc_set_epr() is already defined in asm/kvm_ppc.h, So rename and move get_epr helper function to same file. Signed-off-by: Bharat Bhushan [agraf: remove duplicate return] Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c95bdbd..246fb9a7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -392,6 +392,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#elif defined(CONFIG_BOOKE) + return vcpu->arch.epr; +#else + return 0; +#endif +} + static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) { #ifdef CONFIG_KVM_BOOKE_HV diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 34562d4..a06ef6b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,15 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GEPR); -#else - return vcpu->arch.epr; -#endif -} - /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -1452,7 +1443,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { - u32 epr = get_guest_epr(vcpu); + u32 epr = kvmppc_get_epr(vcpu); val = get_reg_val(reg->id, epr); break; } -- cgit v0.10.2 From b5741bb3d4dc751a76f6caaaec8e2b271ff8ca48 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:18 +0300 Subject: KVM: PPC: e500mc: Revert "add load inst fixup" The commit 1d628af7 "add load inst fixup" made an attempt to handle failures generated by reading the guest current instruction. The fixup code that was added works by chance hiding the real issue. Load external pid (lwepx) instruction, used by KVM to read guest instructions, is executed in a subsituted guest translation context (EPLC[EGS] = 1). In consequence lwepx's TLB error and data storage interrupts need to be handled by KVM, even though these interrupts are generated from host context (MSR[GS] = 0) where lwepx is executed. Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1), doing minimal checks on the fast path to avoid host performance degradation. As a result, the host kernel handles lwepx faults searching the faulting guest data address (loaded in DEAR) in its own Logical Partition ID (LPID) 0 context. In case a host translation is found the execution returns to the lwepx instruction instead of the fixup, the host ending up in an infinite loop. Revert the commit "add load inst fixup". lwepx issue will be addressed in a subsequent patch without needing fixup code. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index a1712b8..6ff4480 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -29,7 +29,6 @@ #include #include #include -#include #ifdef CONFIG_64BIT #include @@ -164,32 +163,9 @@ PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) mtspr SPRN_EPLC, r8 - - /* disable preemption, so we are sure we hit the fixup handler */ - CURRENT_THREAD_INFO(r8, r1) - li r7, 1 - stw r7, TI_PREEMPT(r8) - isync - - /* - * In case the read goes wrong, we catch it and write an invalid value - * in LAST_INST instead. - */ -1: lwepx r9, 0, r5 -2: -.section .fixup, "ax" -3: li r9, KVM_INST_FETCH_FAILED - b 2b -.previous -.section __ex_table,"a" - PPC_LONG_ALIGN - PPC_LONG 1b,3b -.previous - + lwepx r9, 0, r5 mtspr SPRN_EPLC, r3 - li r7, 0 - stw r7, TI_PREEMPT(r8) stw r9, VCPU_LAST_INST(r4) .endif -- cgit v0.10.2 From 9c0d4e0dcf69b1ab3a9f8debebd119f53964cb57 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:19 +0300 Subject: KVM: PPC: Book3e: Add TLBSEL/TSIZE defines for MAS0/1 Add mising defines MAS0_GET_TLBSEL() and MAS1_GET_TSIZE() for Book3E. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 8d24f78..cd4f04a 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,9 +40,11 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL_MASK 0x30000000 -#define MAS0_TLBSEL_SHIFT 28 -#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_GET_TLBSEL(mas0) (((mas0) & MAS0_TLBSEL_MASK) >> \ + MAS0_TLBSEL_SHIFT) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -60,6 +62,7 @@ #define MAS1_TSIZE_MASK 0x00000f80 #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) +#define MAS1_GET_TSIZE(mas1) (((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT) #define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040 -- cgit v0.10.2 From 9a26af64d6bba72c9dfd62cc0cab0e79f8a66d7b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:20 +0300 Subject: KVM: PPC: Book3s: Remove kvmppc_read_inst() function In the context of replacing kvmppc_ld() function calls with a version of kvmppc_get_last_inst() which allow to fail, Alex Graf suggested this: "If we get EMULATE_AGAIN, we just have to make sure we go back into the guest. No need to inject an ISI into the guest - it'll do that all by itself. With an error returning kvmppc_get_last_inst we can just use completely get rid of kvmppc_read_inst() and only use kvmppc_get_last_inst() instead." As a intermediate step get rid of kvmppc_read_inst() and only use kvmppc_ld() instead. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e40765f..e76aec3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -710,42 +710,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) #endif } -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret == -ENOENT) { - ulong msr = kvmppc_get_msr(vcpu); - - msr = kvmppc_set_field(msr, 33, 33, 1); - msr = kvmppc_set_field(msr, 34, 36, 0); - msr = kvmppc_set_field(msr, 42, 47, 0); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) @@ -1149,31 +1113,49 @@ program_interrupt: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; + int emul; + ulong pc; + u32 last_inst; + + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { + /* Do paired single instruction emulation */ + pc = kvmppc_get_pc(vcpu); + last_inst = kvmppc_get_last_inst(vcpu); + emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, + false); + if (emul == EMULATE_DONE) + goto program_interrupt; + else + r = RESUME_GUEST; - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + break; } - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + /* Enable external provider */ + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: + ext_msr = MSR_FP; break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; + + case BOOK3S_INTERRUPT_ALTIVEC: + ext_msr = MSR_VEC; break; - default: - /* nothing to worry about - go again */ + + case BOOK3S_INTERRUPT_VSX: + ext_msr = MSR_VSX; break; } + + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); break; } case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - u32 last_inst = kvmppc_get_last_inst(vcpu); + { + ulong pc = kvmppc_get_pc(vcpu); + u32 last_inst = kvmppc_get_last_inst(vcpu); + int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false); + + if (emul == EMULATE_DONE) { u32 dsisr; u64 dar; @@ -1187,6 +1169,7 @@ program_interrupt: } r = RESUME_GUEST; break; + } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); -- cgit v0.10.2 From 51f047261e717b74b226f837a16455994b61ae30 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:21 +0300 Subject: KVM: PPC: Allow kvmppc_get_last_inst() to fail On book3e, guest last instruction is read on the exit path using load external pid (lwepx) dedicated instruction. This load operation may fail due to TLB eviction and execute-but-not-read entries. This patch lay down the path for an alternative solution to read the guest last instruction, by allowing kvmppc_get_lat_inst() function to fail. Architecture specific implmentations of kvmppc_load_last_inst() may read last guest instruction and instruct the emulation layer to re-execute the guest in case of failure. Make kvmppc_get_last_inst() definition common between architectures. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 20fb6f2..a86ca65 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -276,32 +276,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE); } -static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) -{ - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - - return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) : - vcpu->arch.last_inst; -} - -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu)); -} - -/* - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. - * Because the sc instruction sets SRR0 to point to the following - * instruction, we have to fetch from pc - 4. - */ -static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4); -} - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dar; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index c7aed61..cbb1990 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -69,11 +69,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return false; } -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.last_inst; -} - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.ctr = val; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 246fb9a7..e381363 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -47,6 +47,11 @@ enum emulation_result { EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; +enum instruction_type { + INST_GENERIC, + INST_SC, /* system call */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -62,6 +67,9 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); +extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst); + extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -234,6 +242,29 @@ struct kvmppc_ops { extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; +static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst) +{ + int ret = EMULATE_DONE; + u32 fetched_inst; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst); + + /* Write fetch_failed unswapped if the fetch failed */ + if (ret == EMULATE_DONE) + fetched_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.last_inst) : + vcpu->arch.last_inst; + else + fetched_inst = vcpu->arch.last_inst; + + *inst = fetched_inst; + return ret; +} + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) { return kvm->arch.kvm_ops == kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 31facfc..37ca8a0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -488,6 +488,23 @@ mmio: } EXPORT_SYMBOL_GPL(kvmppc_ld); +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *inst) +{ + ulong pc = kvmppc_get_pc(vcpu); + int r; + + if (type == INST_SC) + pc -= 4; + + r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false); + if (r == EMULATE_DONE) + return r; + else + return EMULATE_AGAIN; +} +EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { return 0; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d154d9..fa944a3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -530,21 +530,14 @@ static int instruction_is_store(unsigned int instr) static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { - int ret; u32 last_inst; - unsigned long srr0 = kvmppc_get_pc(vcpu); - /* We try to load the last instruction. We don't let - * emulate_instruction do it as it doesn't check what - * kvmppc_ld returns. + /* * If we fail, we just return to the guest and try executing it again. */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) - return RESUME_GUEST; - vcpu->arch.last_inst = last_inst; - } + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) + return RESUME_GUEST; /* * WARNING: We do not know for sure whether the instruction we just @@ -558,7 +551,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, * we just return and retry the instruction. */ - if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store) + if (instruction_is_store(last_inst) != !!is_store) return RESUME_GUEST; /* diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 6c8011f..bfb8035 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); + u32 inst; enum emulation_result emulated = EMULATE_DONE; + int ax_rd, ax_ra, ax_rb, ax_rc; + short full_d; + u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c; - int ax_rd = inst_get_field(inst, 6, 10); - int ax_ra = inst_get_field(inst, 11, 15); - int ax_rb = inst_get_field(inst, 16, 20); - int ax_rc = inst_get_field(inst, 21, 25); - short full_d = inst_get_field(inst, 16, 31); - - u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); - u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); - u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); - u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); - - bool rcomp = (inst & 1) ? true : false; - u32 cr = kvmppc_get_cr(vcpu); + bool rcomp; + u32 cr; #ifdef DEBUG int i; #endif + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ax_rd = inst_get_field(inst, 6, 10); + ax_ra = inst_get_field(inst, 11, 15); + ax_rb = inst_get_field(inst, 16, 20); + ax_rc = inst_get_field(inst, 21, 25); + full_d = inst_get_field(inst, 16, 31); + + fpr_d = &VCPU_FPR(vcpu, ax_rd); + fpr_a = &VCPU_FPR(vcpu, ax_ra); + fpr_b = &VCPU_FPR(vcpu, ax_rb); + fpr_c = &VCPU_FPR(vcpu, ax_rc); + + rcomp = (inst & 1) ? true : false; + cr = kvmppc_get_cr(vcpu); + if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e76aec3..b18f2d4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1018,15 +1018,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result er; ulong flags; + u32 last_inst; + int emul; program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + if (emul != EMULATE_DONE) { + r = RESUME_GUEST; + break; + } + if (kvmppc_get_msr(vcpu) & MSR_PR) { #ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", + kvmppc_get_pc(vcpu), last_inst); #endif - if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; @@ -1045,7 +1054,7 @@ program_interrupt: break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + __func__, kvmppc_get_pc(vcpu), last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; @@ -1062,8 +1071,23 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: + { + u32 last_sc; + int emul; + + /* Get last sc for papr */ + if (vcpu->arch.papr_enabled) { + /* The sc instuction points SRR0 to the next inst */ + emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc); + if (emul != EMULATE_DONE) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4); + r = RESUME_GUEST; + break; + } + } + if (vcpu->arch.papr_enabled && - (kvmppc_get_last_sc(vcpu) == 0x44000022) && + (last_sc == 0x44000022) && !(kvmppc_get_msr(vcpu) & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -1108,21 +1132,19 @@ program_interrupt: r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_FP_UNAVAIL: case BOOK3S_INTERRUPT_ALTIVEC: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; int emul; - ulong pc; u32 last_inst; if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { /* Do paired single instruction emulation */ - pc = kvmppc_get_pc(vcpu); - last_inst = kvmppc_get_last_inst(vcpu); - emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, - false); + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &last_inst); if (emul == EMULATE_DONE) goto program_interrupt; else @@ -1151,9 +1173,8 @@ program_interrupt: } case BOOK3S_INTERRUPT_ALIGNMENT: { - ulong pc = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false); + u32 last_inst; + int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul == EMULATE_DONE) { u32 dsisr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a06ef6b..50df5e3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -702,6 +702,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) * they were actually modified by emulation. */ return RESUME_GUEST_NV; + case EMULATE_AGAIN: + return RESUME_GUEST; + case EMULATE_DO_DCR: run->exit_reason = KVM_EXIT_DCR; return RESUME_HOST; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 79677d7..4385c14 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -610,6 +610,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + return EMULATE_AGAIN; +} + /************* MMU Notifiers *************/ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index da86d9b..c5c64b6 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -224,19 +224,25 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); - int ra = get_ra(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int sprn = get_sprn(inst); - enum emulation_result emulated = EMULATE_DONE; + u32 inst; + int ra, rs, rt, sprn; + enum emulation_result emulated; int advance = 1; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + sprn = get_sprn(inst); + switch (get_op(inst)) { case OP_TRAP: #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index fe0257a..cfa6cfa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -280,6 +280,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) * actually modified. */ r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_DO_MMIO: run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store @@ -289,11 +292,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST_NV; break; case EMULATE_FAIL: + { + u32 last_inst; + + kvmppc_get_last_inst(vcpu, false, &last_inst); /* XXX Deliver Program interrupt to guest. */ - printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, - kvmppc_get_last_inst(vcpu)); + pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; break; + } default: WARN_ON(1); r = RESUME_GUEST; -- cgit v0.10.2 From f5250471b2d6ad27d536cb34ce39d76b91b2b36b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:22 +0300 Subject: KVM: PPC: Bookehv: Get vcpu's last instruction for emulation On book3e, KVM uses load external pid (lwepx) dedicated instruction to read guest last instruction on the exit path. lwepx exceptions (DTLB_MISS, DSI and LRAT), generated by loading a guest address, needs to be handled by KVM. These exceptions are generated in a substituted guest translation context (EPLC[EGS] = 1) from host context (MSR[GS] = 0). Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1), doing minimal checks on the fast path to avoid host performance degradation. lwepx exceptions originate from host state (MSR[GS] = 0) which implies additional checks in DO_KVM macro (beside the current MSR[GS] = 1) by looking at the Exception Syndrome Register (ESR[EPID]) and the External PID Load Context Register (EPLC[EGS]). Doing this on each Data TLB miss exception is obvious too intrusive for the host. Read guest last instruction from kvmppc_load_last_inst() by searching for the physical address and kmap it. This address the TODO for TLB eviction and execute-but-not-read entries, and allow us to get rid of lwepx until we are able to handle failures. A simple stress benchmark shows a 1% sys performance degradation compared with previous approach (lwepx without failure handling): time for i in `seq 1 10000`; do /bin/echo > /dev/null; done real 0m 8.85s user 0m 4.34s sys 0m 4.48s vs real 0m 8.84s user 0m 4.36s sys 0m 4.44s A solution to use lwepx and to handle its exceptions in KVM would be to temporary highjack the interrupt vector from host. This imposes additional synchronizations for cores like FSL e6500 that shares host IVOR registers between hardware threads. This optimized solution can be later developed on top of this patch. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 50df5e3..97bcde2 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -819,6 +819,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } +static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + enum emulation_result emulated, u32 last_inst) +{ + switch (emulated) { + case EMULATE_AGAIN: + return RESUME_GUEST; + + case EMULATE_FAIL: + pr_debug("%s: load instruction from guest address %lx failed\n", + __func__, vcpu->arch.pc); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -830,6 +852,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, int r = RESUME_HOST; int s; int idx; + u32 last_inst = KVM_INST_FETCH_FAILED; + enum emulation_result emulated = EMULATE_DONE; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -837,6 +861,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); + /* + * get last instruction before beeing preempted + * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA + */ + switch (exit_nr) { + case BOOKE_INTERRUPT_DATA_STORAGE: + case BOOKE_INTERRUPT_DTLB_MISS: + case BOOKE_INTERRUPT_HV_PRIV: + emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); + break; + default: + break; + } + local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); @@ -845,6 +883,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + if (emulated != EMULATE_DONE) { + r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + goto out; + } + switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); @@ -1134,6 +1177,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } +out: /* * To avoid clobbering exit_reason, only check for signals if we * aren't already exiting to userspace for some other reason. diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 6ff4480..e000b39 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -121,38 +121,14 @@ 1: .if \flags & NEED_EMU - /* - * This assumes you have external PID support. - * To support a bookehv CPU without external PID, you'll - * need to look up the TLB entry and create a temporary mapping. - * - * FIXME: we don't currently handle if the lwepx faults. PR-mode - * booke doesn't handle it either. Since Linux doesn't use - * broadcast tlbivax anymore, the only way this should happen is - * if the guest maps its memory execute-but-not-read, or if we - * somehow take a TLB miss in the middle of this entry code and - * evict the relevant entry. On e500mc, all kernel lowmem is - * bolted into TLB1 large page mappings, and we don't use - * broadcast invalidates, so we should not take a TLB miss here. - * - * Later we'll need to deal with faults here. Disallowing guest - * mappings that are execute-but-not-read could be an option on - * e500mc, but not on chips with an LRAT if it is used. - */ - - mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ PPC_STL r15, VCPU_GPR(R15)(r4) PPC_STL r16, VCPU_GPR(R16)(r4) PPC_STL r17, VCPU_GPR(R17)(r4) PPC_STL r18, VCPU_GPR(R18)(r4) PPC_STL r19, VCPU_GPR(R19)(r4) - mr r8, r3 PPC_STL r20, VCPU_GPR(R20)(r4) - rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS PPC_STL r21, VCPU_GPR(R21)(r4) - rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR PPC_STL r22, VCPU_GPR(R22)(r4) - rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID PPC_STL r23, VCPU_GPR(R23)(r4) PPC_STL r24, VCPU_GPR(R24)(r4) PPC_STL r25, VCPU_GPR(R25)(r4) @@ -162,10 +138,15 @@ PPC_STL r29, VCPU_GPR(R29)(r4) PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) - mtspr SPRN_EPLC, r8 - isync - lwepx r9, 0, r5 - mtspr SPRN_EPLC, r3 + + /* + * We don't use external PID support. lwepx faults would need to be + * handled by KVM and this implies aditional code in DO_KVM (for + * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which + * is too intrusive for the host. Get last instuction in + * kvmppc_get_last_inst(). + */ + li r9, KVM_INST_FETCH_FAILED stw r9, VCPU_LAST_INST(r4) .endif diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4385c14..4150826 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -610,11 +610,103 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +#ifdef CONFIG_KVM_BOOKE_HV +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + gva_t geaddr; + hpa_t addr; + hfn_t pfn; + hva_t eaddr; + u32 mas1, mas2, mas3; + u64 mas7_mas3; + struct page *page; + unsigned int addr_space, psize_shift; + bool pr; + unsigned long flags; + + /* Search TLB for guest pc to get the real address */ + geaddr = kvmppc_get_pc(vcpu); + + addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG; + + local_irq_save(flags); + mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); + asm volatile("tlbsx 0, %[geaddr]\n" : : + [geaddr] "r" (geaddr)); + mtspr(SPRN_MAS5, 0); + mtspr(SPRN_MAS8, 0); + mas1 = mfspr(SPRN_MAS1); + mas2 = mfspr(SPRN_MAS2); + mas3 = mfspr(SPRN_MAS3); +#ifdef CONFIG_64BIT + mas7_mas3 = mfspr(SPRN_MAS7_MAS3); +#else + mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3; +#endif + local_irq_restore(flags); + + /* + * If the TLB entry for guest pc was evicted, return to the guest. + * There are high chances to find a valid TLB entry next time. + */ + if (!(mas1 & MAS1_VALID)) + return EMULATE_AGAIN; + + /* + * Another thread may rewrite the TLB entry in parallel, don't + * execute from the address if the execute permission is not set + */ + pr = vcpu->arch.shared->msr & MSR_PR; + if (unlikely((pr && !(mas3 & MAS3_UX)) || + (!pr && !(mas3 & MAS3_SX)))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx without execute permission\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* + * The real address will be mapped by a cacheable, memory coherent, + * write-back page. Check for mismatches when LRAT is used. + */ + if (has_feature(vcpu, VCPU_FTR_MMU_V2) && + unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* Get pfn */ + psize_shift = MAS1_GET_TSIZE(mas1) + 10; + addr = (mas7_mas3 & (~0ULL << psize_shift)) | + (geaddr & ((1ULL << psize_shift) - 1ULL)); + pfn = addr >> PAGE_SHIFT; + + /* Guard against emulation from devices area */ + if (unlikely(!page_is_ram(pfn))) { + pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n", + __func__, addr); + return EMULATE_AGAIN; + } + + /* Map a page and get guest's instruction */ + page = pfn_to_page(pfn); + eaddr = (unsigned long)kmap_atomic(page); + *instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK)); + kunmap_atomic((u32 *)eaddr); + + return EMULATE_DONE; +} +#else int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *instr) { return EMULATE_AGAIN; } +#endif /************* MMU Notifiers *************/ -- cgit v0.10.2 From 99e99d19a86dc596703ed79dcecf9ca6b32a6a8a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 21 Jul 2014 11:23:26 +0530 Subject: kvm: ppc: bookehv: Save restore SPRN_SPRG9 on guest entry exit SPRN_SPRG is used by debug interrupt handler, so this is required for debug support. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 855ba4d..562f685 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -587,6 +587,7 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u64 sprg9; u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 17ffcb4..ab9ae04 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -668,6 +668,7 @@ int main(void) DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e000b39..b4f8fba 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -398,6 +398,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT PPC_LL r3, PACA_SPRG_VDSO(r13) #endif + mfspr r5, SPRN_SPRG9 PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) @@ -405,6 +406,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT mtspr SPRN_SPRG_VDSO_WRITE, r3 #endif + PPC_STD(r5, VCPU_SPRG9, r4) PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 PPC_STD(r9, VCPU_SHARED_SPRG7, r11) @@ -639,7 +641,9 @@ lightweight_exit: mtspr SPRN_SPRG5W, r6 PPC_LD(r8, VCPU_SHARED_SPRG7, r11) mtspr SPRN_SPRG6W, r7 + PPC_LD(r5, VCPU_SPRG9, r4) mtspr SPRN_SPRG7W, r8 + mtspr SPRN_SPRG9, r5 /* Load some guest volatiles. */ PPC_LL r3, VCPU_LR(r4) -- cgit v0.10.2 From 28d2f421bcff6781b80decac59da414f86f35c2a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 25 Jul 2014 11:21:08 +0530 Subject: KVM: PPC: Booke-hv: Add one reg interface for SPRG9 We now support SPRG9 for guest, so also add a one reg interface for same Note: Changes are in bookehv code only as we do not have SPRG9 on booke-pr. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2bc4a94..0e56d9e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -555,6 +555,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) +#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 690499d..164bad2 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + *val = get_reg_val(id, vcpu->arch.sprg9); + break; + default: + r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + } + return r; } static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + vcpu->arch.sprg9 = set_reg_val(id, *val); + break; + default: + r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + } + return r; } -- cgit v0.10.2 From 8c95ead6039d46d2d5c375d3cadac8708121fbe4 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 25 Jul 2014 11:32:43 +0530 Subject: KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD <=> SPRG3 (thread->vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c8f3381..0ef17ad 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -944,9 +944,6 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors -- cgit v0.10.2 From b2677b8dd8de0dc1496ede4da09b9dfd59f15cea Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Jul 2014 10:38:59 +0200 Subject: KVM: PPC: Remove 440 support The 440 target hasn't been properly functioning for a few releases and before I was the only one who fixes a very serious bug that indicates to me that nobody used it before either. Furthermore KVM on 440 is slow to the extent of unusable. We don't have to carry along completely unused code. Remove 440 and give us one less thing to worry about. Signed-off-by: Alexander Graf diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX index 6db73df..a68784d 100644 --- a/Documentation/powerpc/00-INDEX +++ b/Documentation/powerpc/00-INDEX @@ -17,8 +17,6 @@ firmware-assisted-dump.txt - Documentation on the firmware assisted dump mechanism "fadump". hvcs.txt - IBM "Hypervisor Virtual Console Server" Installation Guide -kvm_440.txt - - Various notes on the implementation of KVM for PowerPC 440. mpc52xx.txt - Linux 2.6.x on MPC52xx family pmu-ebb.txt diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt deleted file mode 100644 index c02a003..0000000 --- a/Documentation/powerpc/kvm_440.txt +++ /dev/null @@ -1,41 +0,0 @@ -Hollis Blanchard -15 Apr 2008 - -Various notes on the implementation of KVM for PowerPC 440: - -To enforce isolation, host userspace, guest kernel, and guest userspace all -run at user privilege level. Only the host kernel runs in supervisor mode. -Executing privileged instructions in the guest traps into KVM (in the host -kernel), where we decode and emulate them. Through this technique, unmodified -440 Linux kernels can be run (slowly) as guests. Future performance work will -focus on reducing the overhead and frequency of these traps. - -The usual code flow is started from userspace invoking an "run" ioctl, which -causes KVM to switch into guest context. We use IVPR to hijack the host -interrupt vectors while running the guest, which allows us to direct all -interrupts to kvmppc_handle_interrupt(). At this point, we could either -- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or -- let the host interrupt handler run (e.g. when the decrementer fires), or -- return to host userspace (e.g. when the guest performs device MMIO) - -Address spaces: We take advantage of the fact that Linux doesn't use the AS=1 -address space (in host or guest), which gives us virtual address space to use -for guest mappings. While the guest is running, the host kernel remains mapped -in AS=0, but the guest can only use AS=1 mappings. - -TLB entries: The TLB entries covering the host linear mapping remain -present while running the guest. This reduces the overhead of lightweight -exits, which are handled by KVM running in the host kernel. We keep three -copies of the TLB: - - guest TLB: contents of the TLB as the guest sees it - - shadow TLB: the TLB that is actually in hardware while guest is running - - host TLB: to restore TLB state when context switching guest -> host -When a TLB miss occurs because a mapping was not present in the shadow TLB, -but was present in the guest TLB, KVM handles the fault without invoking the -guest. Large guest pages are backed by multiple 4KB shadow pages through this -mechanism. - -IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network -and block IO, so those drivers must be enabled in the guest. It's possible -that some qemu device emulation (e.g. e1000 or rtl8139) may also work with -little effort. diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 790352f..93500f6 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT config PPC_EARLY_DEBUG_44x bool "Early serial debugging for IBM/AMCC 44x CPUs" - # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water - # mark, which doesn't work with current 440 KVM. - depends on 44x && !KVM + depends on 44x help Select this to enable early debugging for IBM 44x chips via the inbuilt serial port. If you enable this, ensure you set diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index ccf66b9..924e10d 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_VIRTUALIZATION=y -CONFIG_KVM_440=y diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h deleted file mode 100644 index a0e5761..0000000 --- a/arch/powerpc/include/asm/kvm_44x.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#ifndef __ASM_44X_H__ -#define __ASM_44X_H__ - -#include - -#define PPC44x_TLB_SIZE 64 - -/* If the guest is expecting it, this can be as large as we like; we'd just - * need to find some way of advertising it. */ -#define KVM44x_GUEST_TLB_SIZE 64 - -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - -struct kvmppc_44x_shadow_ref { - struct page *page; - u16 gtlb_index; - u8 writeable; - u8 tid; -}; - -struct kvmppc_vcpu_44x { - /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - - /* References to guest pages in the hardware TLB. */ - struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; - - /* State of the shadow TLB at guest context switch time. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); -} - -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); - -#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 3f3e530..b8901c4 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -33,7 +33,6 @@ /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) -#define VCPU_TLB_PGSZ PPC44x_TLB_64K #define VCPU_SIZE_BYTES (1< #define KVM_ARCH_WANT_MMU_NOTIFIER @@ -62,8 +61,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -#endif - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c deleted file mode 100644 index 9cb4b0a..0000000 --- a/arch/powerpc/kvm/44x.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "44x_tlb.h" -#include "booke.h" - -static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) -{ - kvmppc_booke_vcpu_load(vcpu, cpu); - kvmppc_44x_tlb_load(vcpu); -} - -static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) -{ - kvmppc_44x_tlb_put(vcpu); - kvmppc_booke_vcpu_put(vcpu); -} - -int kvmppc_core_check_processor_compat(void) -{ - int r; - - if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) - r = 0; - else - r = -ENOTSUPP; - - return r; -} - -int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; - int i; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - /* XXX CCR1 doesn't exist on all 440 SoCs. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) - vcpu_44x->shadow_refs[i].gtlb_index = -1; - - vcpu->arch.cpu_type = KVM_CPU_440; - vcpu->arch.pvr = mfspr(SPRN_PVR); - - return 0; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - -static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_get_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_set_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, - unsigned int id) -{ - struct kvmppc_vcpu_44x *vcpu_44x; - struct kvm_vcpu *vcpu; - int err; - - vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_44x) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_44x->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) - goto uninit_vcpu; - - return vcpu; - -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -out: - return ERR_PTR(err); -} - -static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - - free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -} - -static int kvmppc_core_init_vm_44x(struct kvm *kvm) -{ - return 0; -} - -static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) -{ -} - -static struct kvmppc_ops kvm_ops_44x = { - .get_sregs = kvmppc_core_get_sregs_44x, - .set_sregs = kvmppc_core_set_sregs_44x, - .get_one_reg = kvmppc_get_one_reg_44x, - .set_one_reg = kvmppc_set_one_reg_44x, - .vcpu_load = kvmppc_core_vcpu_load_44x, - .vcpu_put = kvmppc_core_vcpu_put_44x, - .vcpu_create = kvmppc_core_vcpu_create_44x, - .vcpu_free = kvmppc_core_vcpu_free_44x, - .mmu_destroy = kvmppc_mmu_destroy_44x, - .init_vm = kvmppc_core_init_vm_44x, - .destroy_vm = kvmppc_core_destroy_vm_44x, - .emulate_op = kvmppc_core_emulate_op_44x, - .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, - .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, -}; - -static int __init kvmppc_44x_init(void) -{ - int r; - - r = kvmppc_booke_init(); - if (r) - goto err_out; - - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); - if (r) - goto err_out; - kvm_ops_44x.owner = THIS_MODULE; - kvmppc_pr_ops = &kvm_ops_44x; - -err_out: - return r; -} - -static void __exit kvmppc_44x_exit(void) -{ - kvmppc_pr_ops = NULL; - kvmppc_booke_exit(); -} - -module_init(kvmppc_44x_init); -module_exit(kvmppc_44x_exit); -MODULE_ALIAS_MISCDEV(KVM_MINOR); -MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c deleted file mode 100644 index 92c9ab4..0000000 --- a/arch/powerpc/kvm/44x_emulate.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include -#include "timing.h" - -#include "booke.h" -#include "44x_tlb.h" - -#define XOP_MFDCRX 259 -#define XOP_MFDCR 323 -#define XOP_MTDCRX 387 -#define XOP_MTDCR 451 -#define XOP_TLBSX 914 -#define XOP_ICCCI 966 -#define XOP_TLBWE 978 - -static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) -{ - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - return EMULATE_DONE; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); - vcpu->run->dcr.is_write = 1; - vcpu->arch.dcr_is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } -} - -static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) -{ - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = 0; - vcpu->run->dcr.is_write = 0; - vcpu->arch.dcr_is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } - - return EMULATE_DONE; -} - -int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) -{ - int emulated = EMULATE_DONE; - int dcrn = get_dcrn(inst); - int ra = get_ra(inst); - int rb = get_rb(inst); - int rc = get_rc(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int ws = get_ws(inst); - - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - - case XOP_MFDCR: - emulated = emulate_mfdcr(vcpu, rt, dcrn); - break; - - case XOP_MFDCRX: - emulated = emulate_mfdcr(vcpu, rt, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_MTDCR: - emulated = emulate_mtdcr(vcpu, rs, dcrn); - break; - - case XOP_MTDCRX: - emulated = emulate_mtdcr(vcpu, rs, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_TLBWE: - emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); - break; - - case XOP_TLBSX: - emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); - break; - - case XOP_ICCCI: - break; - - default: - emulated = EMULATE_FAIL; - } - - break; - - default: - emulated = EMULATE_FAIL; - } - - if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); - - return emulated; -} - -int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - kvmppc_set_pid(vcpu, spr_val); break; - case SPRN_MMUCR: - vcpu->arch.mmucr = spr_val; break; - case SPRN_CCR0: - vcpu->arch.ccr0 = spr_val; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = spr_val; break; - default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); - } - - return emulated; -} - -int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - *spr_val = vcpu->arch.pid; break; - case SPRN_MMUCR: - *spr_val = vcpu->arch.mmucr; break; - case SPRN_CCR0: - *spr_val = vcpu->arch.ccr0; break; - case SPRN_CCR1: - *spr_val = vcpu->arch.ccr1; break; - default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); - } - - return emulated; -} - diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c deleted file mode 100644 index 0deef10..0000000 --- a/arch/powerpc/kvm/44x_tlb.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "timing.h" - -#include "44x_tlb.h" -#include "trace.h" - -#ifndef PPC44x_TLBE_SIZE -#define PPC44x_TLBE_SIZE PPC44x_TLB_4K -#endif - -#define PAGE_SIZE_4K (1<<12) -#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) - -#define PPC44x_TLB_UATTR_MASK \ - (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) -#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) -#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) - -#ifdef DEBUG -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - tlbe = &vcpu_44x->guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} -#endif - -static inline void kvmppc_44x_tlbie(unsigned int index) -{ - /* 0 <= index < 64, so the V bit is clear and we can use the index as - * word0. */ - asm volatile( - "tlbwe %[index], %[index], 0\n" - : - : [index] "r"(index) - ); -} - -static inline void kvmppc_44x_tlbre(unsigned int index, - struct kvmppc_44x_tlbe *tlbe) -{ - asm volatile( - "tlbre %[word0], %[index], 0\n" - "mfspr %[tid], %[sprn_mmucr]\n" - "andi. %[tid], %[tid], 0xff\n" - "tlbre %[word1], %[index], 1\n" - "tlbre %[word2], %[index], 2\n" - : [word0] "=r"(tlbe->word0), - [word1] "=r"(tlbe->word1), - [word2] "=r"(tlbe->word2), - [tid] "=r"(tlbe->tid) - : [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - : "cc" - ); -} - -static inline void kvmppc_44x_tlbwe(unsigned int index, - struct kvmppc_44x_tlbe *stlbe) -{ - unsigned long tmp; - - asm volatile( - "mfspr %[tmp], %[sprn_mmucr]\n" - "rlwimi %[tmp], %[tid], 0, 0xff\n" - "mtspr %[sprn_mmucr], %[tmp]\n" - "tlbwe %[word0], %[index], 0\n" - "tlbwe %[word1], %[index], 1\n" - "tlbwe %[word2], %[index], 2\n" - : [tmp] "=&r"(tmp) - : [word0] "r"(stlbe->word0), - [word1] "r"(stlbe->word1), - [word2] "r"(stlbe->word2), - [tid] "r"(stlbe->tid), - [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - ); -} - -static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) -{ - /* We only care about the guest's permission and user bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; - - if (!usermode) { - /* Guest is in supervisor mode, so we need to translate guest - * supervisor permissions into user permissions. */ - attrib &= ~PPC44x_TLB_USER_PERM_MASK; - attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; - } - - /* Make sure host can always access this memory. */ - attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; - - /* WIMGE = 0b00100 */ - attrib |= PPC44x_TLB_M; - - return attrib; -} - -/* Load shadow TLB back into hardware. */ -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbwe(i, stlbe); - } -} - -static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int i) -{ - vcpu_44x->shadow_tlb_mod[i] = 1; -} - -/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (vcpu_44x->shadow_tlb_mod[i]) - kvmppc_44x_tlbre(i, stlbe); - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbie(i); - } -} - - -/* Search the guest TLB for a matching entry. */ -int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, - unsigned int as) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; - unsigned int tid; - - if (eaddr < get_tlb_eaddr(tlbe)) - continue; - - if (eaddr > get_tlb_end(tlbe)) - continue; - - tid = get_tlb_tid(tlbe); - if (tid && (tid != pid)) - continue; - - if (!get_tlb_v(tlbe)) - continue; - - if (get_tlb_ts(tlbe) != as) - continue; - - return i; - } - - return -1; -} - -gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, - gva_t eaddr) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; - - return get_tlb_raddr(gtlbe) | (eaddr & pgmask); -} - -int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) -{ -} - -static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int stlb_index) -{ - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; - - if (!ref->page) - return; - - /* Discard from the TLB. */ - /* Note: we could actually invalidate a host mapping, if the host overwrote - * this TLB entry since we inserted a guest mapping. */ - kvmppc_44x_tlbie(stlb_index); - - /* Now release the page. */ - if (ref->writeable) - kvm_release_page_dirty(ref->page); - else - kvm_release_page_clean(ref->page); - - ref->page = NULL; - - /* XXX set tlb_44x_index to stlb_index? */ - - trace_kvm_stlb_inval(stlb_index); -} - -void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu_44x, i); -} - -/** - * kvmppc_mmu_map -- create a host mapping for guest memory - * - * If the guest wanted a larger page than the host supports, only the first - * host page is mapped here and the rest are demand faulted. - * - * If the guest wanted a smaller page than the host page size, we map only the - * guest-size page (i.e. not a full host page mapping). - * - * Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. - */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - unsigned int gtlb_index) -{ - struct kvmppc_44x_tlbe stlbe; - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - struct kvmppc_44x_shadow_ref *ref; - struct page *new_page; - hpa_t hpaddr; - gfn_t gfn; - u32 asid = gtlbe->tid; - u32 flags = gtlbe->word2; - u32 max_bytes = get_tlb_bytes(gtlbe); - unsigned int victim; - - /* Select TLB entry to clobber. Indirectly guard against races with the TLB - * miss handler by disabling interrupts. */ - local_irq_disable(); - victim = ++tlb_44x_index; - if (victim > tlb_44x_hwater) - victim = 0; - tlb_44x_index = victim; - local_irq_enable(); - - /* Get reference to new page. */ - gfn = gpaddr >> PAGE_SHIFT; - new_page = gfn_to_page(vcpu->kvm, gfn); - if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", - (unsigned long long)gfn); - return; - } - hpaddr = page_to_phys(new_page); - - /* Invalidate any previous shadow mappings. */ - kvmppc_44x_shadow_release(vcpu_44x, victim); - - /* XXX Make sure (va, size) doesn't overlap any other - * entries. 440x6 user manual says the result would be - * "undefined." */ - - /* XXX what about AS? */ - - /* Force TS=1 for all guest mappings. */ - stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; - - if (max_bytes >= PAGE_SIZE) { - /* Guest mapping is larger than or equal to host page size. We can use - * a "native" host mapping. */ - stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; - } else { - /* Guest mapping is smaller than host page size. We must restrict the - * size of the mapping to be at most the smaller of the two, but for - * simplicity we fall back to a 4K mapping (this is probably what the - * guest is using anyways). */ - stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; - - /* 'hpaddr' is a host page, which is larger than the mapping we're - * inserting here. To compensate, we must add the in-page offset to the - * sub-page. */ - hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); - } - - stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.shared->msr & MSR_PR); - stlbe.tid = !(asid & 0xff); - - /* Keep track of the reference so we can properly release it later. */ - ref = &vcpu_44x->shadow_refs[victim]; - ref->page = new_page; - ref->gtlb_index = gtlb_index; - ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); - ref->tid = stlbe.tid; - - /* Insert shadow mapping into hardware TLB. */ - kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); - kvmppc_44x_tlbwe(victim, &stlbe); - trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2); -} - -/* For a particular guest TLB entry, invalidate the corresponding host TLB - * mappings and release the host pages. */ -static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, - unsigned int gtlb_index) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - if (ref->gtlb_index == gtlb_index) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) -{ - int usermode = vcpu->arch.shared->msr & MSR_PR; - - vcpu->arch.shadow_pid = !usermode; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - if (unlikely(vcpu->arch.pid == new_pid)) - return; - - vcpu->arch.pid = new_pid; - - /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it - * can't access guest kernel mappings (TID=1). When we switch to a new - * guest PID, which will also use host PID=0, we must discard the old guest - * userspace mappings. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - - if (ref->tid == 0) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvmppc_44x_tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - unsigned int gtlb_index; - int idx; - - gtlb_index = kvmppc_get_gpr(vcpu, ra); - if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { - printk("%s: index %d\n", __func__, gtlb_index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu_44x->guest_tlb[gtlb_index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ - if (tlbe->word0 & PPC44x_TLB_VALID) - kvmppc_44x_invalidate(vcpu, gtlb_index); - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = get_mmucr_stid(vcpu); - tlbe->word0 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = kvmppc_get_gpr(vcpu, rs); - break; - - default: - return EMULATE_FAIL; - } - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - if (tlbe_is_host_safe(vcpu, tlbe)) { - gva_t eaddr; - gpa_t gpaddr; - u32 bytes; - - eaddr = get_tlb_eaddr(tlbe); - gpaddr = get_tlb_raddr(tlbe); - - /* Use the advertised page size to mask effective and real addrs. */ - bytes = get_tlb_bytes(tlbe); - eaddr &= ~(bytes - 1); - gpaddr &= ~(bytes - 1); - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); - } - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - - trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; -} - -int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) -{ - u32 ea; - int gtlb_index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - - gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - u32 cr = kvmppc_get_cr(vcpu); - - if (gtlb_index < 0) - kvmppc_set_cr(vcpu, cr & ~0x20000000); - else - kvmppc_set_cr(vcpu, cr | 0x20000000); - } - kvmppc_set_gpr(vcpu, rt, gtlb_index); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; -} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h deleted file mode 100644 index a9ff80e..0000000 --- a/arch/powerpc/kvm/44x_tlb.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard - */ - -#ifndef __KVM_POWERPC_TLB_H__ -#define __KVM_POWERPC_TLB_H__ - -#include -#include - -extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, - unsigned int pid, unsigned int as); - -extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, - u8 rc); -extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); - -/* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 4) & 0xf; -} - -static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->word0 & 0xfffffc00; -} - -static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1 << 10 << (pgsize << 1); -} - -static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) -{ - return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; -} - -static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) -{ - u64 word1 = tlbe->word1; - return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); -} - -static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->tid & 0xff; -} - -static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 8) & 0x1; -} - -static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 9) & 0x1; -} - -static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.mmucr & 0xff; -} - -static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mmucr >> 16) & 0x1; -} - -#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8aeeda1..8f104a6 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -112,23 +112,9 @@ config KVM_BOOK3S_64_PR config KVM_BOOKE_HV bool -config KVM_440 - bool "KVM support for PowerPC 440 processors" - depends on 44x - select KVM - select KVM_MMIO - ---help--- - Support running unmodified 440 guest kernels in virtual machines on - 440 host processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - If unsure, say N. - config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500V2 || KVM_E500MC + depends on KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ce569b6..777f894 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,7 +10,6 @@ KVM := ../../../virt/kvm common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o -CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. @@ -21,16 +20,6 @@ obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-440-objs := \ - $(common-objs-y) \ - booke.o \ - booke_emulate.o \ - booke_interrupts.o \ - 44x.o \ - 44x_tlb.o \ - 44x_emulate.o -kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs) - kvm-e500-objs := \ $(common-objs-y) \ booke.o \ @@ -127,7 +116,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) -obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500V2) += kvm.o obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index b632cd3..f753543 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -99,13 +99,6 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); -extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong spr_val); -extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong *spr_val); extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2c6deb5ef..84c308a 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -424,10 +423,6 @@ lightweight_exit: mtspr SPRN_PID1, r3 #endif -#ifdef CONFIG_44x - iccci 0, 0 /* XXX hack */ -#endif - /* Load some guest volatiles. */ lwz r0, VCPU_GPR(R0)(r4) lwz r2, VCPU_GPR(R2)(r4) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index b4f8fba..e9fa56a 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cfa6cfa..8e03568 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -217,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case KVM_HCALL_TOKEN(KVM_HC_FEATURES): r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) - /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif -- cgit v0.10.2 From a0840240c0c6bcbac8f0f5db11f95c19aaf9b52f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sat, 19 Jul 2014 17:59:34 +1000 Subject: KVM: PPC: Book3S: Fix LPCR one_reg interface Unfortunately, the LPCR got defined as a 32-bit register in the one_reg interface. This is unfortunate because KVM allows userspace to control the DPFD (default prefetch depth) field, which is in the upper 32 bits. The result is that DPFD always get set to 0, which reduces performance in the guest. We can't just change KVM_REG_PPC_LPCR to be a 64-bit register ID, since that would break existing userspace binaries. Instead we define a new KVM_REG_PPC_LPCR_64 id which is 64-bit. Userspace can still use the old KVM_REG_PPC_LPCR id, but it now only modifies those fields in the bottom 32 bits that userspace can modify (ILE, TC and AIL). If userspace uses the new KVM_REG_PPC_LPCR_64 id, it can modify DPFD as well. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Cc: stable@vger.kernel.org Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6955318..884f819 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1869,7 +1869,8 @@ registers, find a list below: PPC | KVM_REG_PPC_PID | 64 PPC | KVM_REG_PPC_ACOP | 64 PPC | KVM_REG_PPC_VRSAVE | 32 - PPC | KVM_REG_PPC_LPCR | 64 + PPC | KVM_REG_PPC_LPCR | 32 + PPC | KVM_REG_PPC_LPCR_64 | 64 PPC | KVM_REG_PPC_PPR | 64 PPC | KVM_REG_PPC_ARCH_COMPAT 32 PPC | KVM_REG_PPC_DABRX | 32 diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0e56d9e..e0e49db 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -548,6 +548,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) +#define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5) #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) /* Architecture compatibility level */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f1281c4..0c5266e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -863,7 +863,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } -static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, + bool preserve_top32) { struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; @@ -898,6 +899,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) mask |= LPCR_AIL; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -1011,6 +1016,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: *val = get_reg_val(id, vcpu->arch.vcore->lpcr); break; case KVM_REG_PPC_PPR: @@ -1216,7 +1222,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, ALIGN(set_reg_val(id, *val), 1UL << 24); break; case KVM_REG_PPC_LPCR: - kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); + break; + case KVM_REG_PPC_LPCR_64: + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); break; case KVM_REG_PPC_PPR: vcpu->arch.ppr = set_reg_val(id, *val); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b18f2d4..e7a1fa2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1314,6 +1314,7 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, to_book3s(vcpu)->hior); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: /* * We are only interested in the LPCR_ILE bit */ @@ -1349,6 +1350,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior_explicit = true; break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; default: -- cgit v0.10.2 From ef1af2e29622ff3403926ae801a2b10da075a2de Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Jul 2014 17:59:35 +1000 Subject: KVM: PPC: Book3S PR: Take SRCU read lock around RTAS kvm_read_guest() call This does for PR KVM what c9438092cae4 ("KVM: PPC: Book3S HV: Take SRCU read lock around kvm_read_guest() call") did for HV KVM, that is, eliminate a "suspicious rcu_dereference_check() usage!" warning by taking the SRCU lock around the call to kvmppc_rtas_hcall(). It also fixes a return of RESUME_HOST to return EMULATE_FAIL instead, since kvmppc_h_pr() is supposed to return EMULATE_* values. Signed-off-by: Paul Mackerras Cc: stable@vger.kernel.org Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 6d0143f..ce3c893 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -267,6 +267,8 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + int rc, idx; + if (cmd <= MAX_HCALL_OPCODE && !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) return EMULATE_FAIL; @@ -299,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) - return RESUME_HOST; - if (kvmppc_rtas_hcall(vcpu)) + break; + idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvmppc_rtas_hcall(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (rc) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; -- cgit v0.10.2 From 1b2e33b071b13980a1f0823fbf139242059697b4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Jul 2014 17:59:36 +1000 Subject: KVM: PPC: Book3S: Make kvmppc_ld return a more accurate error indication At present, kvmppc_ld calls kvmppc_xlate, and if kvmppc_xlate returns any error indication, it returns -ENOENT, which is taken to mean an HPTE not found error. However, the error could have been a segment found (no SLB entry) or a permission error. Similarly, kvmppc_pte_to_hva currently does permission checking, but any error from it is taken by kvmppc_ld to mean that the access is an emulated MMIO access. Also, kvmppc_ld does no execute permission checking. This fixes these problems by (a) returning any error from kvmppc_xlate directly, (b) moving the permission check from kvmppc_pte_to_hva into kvmppc_ld, and (c) adding an execute permission check to kvmppc_ld. This is similar to what was done for kvmppc_st() by commit 82ff911317c3 ("KVM: PPC: Deflect page write faults properly in kvmppc_st"). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 37ca8a0..a3cbada 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -413,17 +413,10 @@ static hva_t kvmppc_bad_hva(void) return PAGE_OFFSET; } -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, - bool read) +static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { hva_t hpage; - if (read && !pte->may_read) - goto err; - - if (!read && !pte->may_write) - goto err; - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); if (kvm_is_error_hva(hpage)) goto err; @@ -462,15 +455,23 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { struct kvmppc_pte pte; hva_t hva = *eaddr; + int rc; vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) - goto nopte; + rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte); + if (rc) + return rc; *eaddr = pte.raddr; - hva = kvmppc_pte_to_hva(vcpu, &pte, true); + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + hva = kvmppc_pte_to_hva(vcpu, &pte); if (kvm_is_error_hva(hva)) goto mmio; @@ -481,8 +482,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; -nopte: - return -ENOENT; mmio: return EMULATE_DO_MMIO; } -- cgit v0.10.2 From de9bdd1a604d30b4e05dc18b5cc6354949253abd Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 18 Jul 2014 14:18:42 +1000 Subject: Split out struct kvmppc_vcore creation to separate function No code changes, just split it out to a function so that with the addition of micro partition prefetch buffer allocation (in subsequent patch) looks neater and doesn't require excessive indentation. Signed-off-by: Stewart Smith Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0c5266e..5042ccc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1303,6 +1303,26 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) +{ + struct kvmppc_vcore *vcore; + + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + + if (vcore == NULL) + return NULL; + + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = TB_NIL; + vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_subcore; + vcore->kvm = kvm; + + return vcore; +} + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { @@ -1354,16 +1374,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_lock(&kvm->lock); vcore = kvm->arch.vcores[core]; if (!vcore) { - vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); - if (vcore) { - INIT_LIST_HEAD(&vcore->runnable_threads); - spin_lock_init(&vcore->lock); - init_waitqueue_head(&vcore->wq); - vcore->preempt_tb = TB_NIL; - vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; - vcore->kvm = kvm; - } + vcore = kvmppc_vcore_create(kvm, core); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; } -- cgit v0.10.2 From 9678cdaae93932473f696fdea5debf3eee1e1260 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 18 Jul 2014 14:18:43 +1000 Subject: Use the POWER8 Micro Partition Prefetch Engine in KVM HV on POWER8 The POWER8 processor has a Micro Partition Prefetch Engine, which is a fancy way of saying "has way to store and load contents of L2 or L2+MRU way of L3 cache". We initiate the storing of the log (list of addresses) using the logmpp instruction and start restore by writing to a SPR. The logmpp instruction takes parameters in a single 64bit register: - starting address of the table to store log of L2/L2+L3 cache contents - 32kb for L2 - 128kb for L2+L3 - Aligned relative to maximum size of the table (32kb or 128kb) - Log control (no-op, L2 only, L2 and L3, abort logout) We should abort any ongoing logging before initiating one. To initiate restore, we write to the MPPR SPR. The format of what to write to the SPR is similar to the logmpp instruction parameter: - starting address of the table to read from (same alignment requirements) - table size (no data, until end of table) - prefetch rate (from fastest possible to slower. about every 8, 16, 24 or 32 cycles) The idea behind loading and storing the contents of L2/L3 cache is to reduce memory latency in a system that is frequently swapping vcores on a physical CPU. The best case scenario for doing this is when some vcores are doing very cache heavy workloads. The worst case is when they have about 0 cache hits, so we just generate needless memory operations. This implementation just does L2 store/load. In my benchmarks this proves to be useful. Benchmark 1: - 16 core POWER8 - 3x Ubuntu 14.04LTS guests (LE) with 8 VCPUs each - No split core/SMT - two guests running sysbench memory test. sysbench --test=memory --num-threads=8 run - one guest running apache bench (of default HTML page) ab -n 490000 -c 400 http://localhost/ This benchmark aims to measure performance of real world application (apache) where other guests are cache hot with their own workloads. The sysbench memory benchmark does pointer sized writes to a (small) memory buffer in a loop. In this benchmark with this patch I can see an improvement both in requests per second (~5%) and in mean and median response times (again, about 5%). The spread of minimum and maximum response times were largely unchanged. benchmark 2: - Same VM config as benchmark 1 - all three guests running sysbench memory benchmark This benchmark aims to see if there is a positive or negative affect to this cache heavy benchmark. Although due to the nature of the benchmark (stores) we may not see a difference in performance, but rather hopefully an improvement in consistency of performance (when vcore switched in, don't have to wait many times for cachelines to be pulled in) The results of this benchmark are improvements in consistency of performance rather than performance itself. With this patch, the few outliers in duration go away and we get more consistent performance in each guest. benchmark 3: - same 3 guests and CPU configuration as benchmark 1 and 2. - two idle guests - 1 guest running STREAM benchmark This scenario also saw performance improvement with this patch. On Copy and Scale workloads from STREAM, I got 5-6% improvement with this patch. For Add and triad, it was around 10% (or more). benchmark 4: - same 3 guests as previous benchmarks - two guests running sysbench --memory, distinctly different cache heavy workload - one guest running STREAM benchmark. Similar improvements to benchmark 3. benchmark 5: - 1 guest, 8 VCPUs, Ubuntu 14.04 - Host configured with split core (SMT8, subcores-per-core=4) - STREAM benchmark In this benchmark, we see a 10-20% performance improvement across the board of STREAM benchmark results with this patch. Based on preliminary investigation and microbenchmarks by Prerna Saxena Signed-off-by: Stewart Smith Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ed0afc1..34a05a1 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -39,6 +40,12 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; + +static inline void logmpp(u64 x) +{ + asm volatile(PPC_LOGMPP(R1) : : "r" (x)); +} + #endif /* __powerpc64__ && ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5fe2b5d..11385bb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -307,6 +307,8 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ + void *mpp_buffer; /* Micro Partition Prefetch buffer */ + bool mpp_buffer_is_valid; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3132bb9..c636841 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -139,6 +139,7 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 +#define PPC_INST_LOGMPP 0x7c0007e4 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -275,6 +276,20 @@ #define __PPC_EH(eh) 0 #endif +/* POWER8 Micro Partition Prefetch (MPP) parameters */ +/* Address mask is common for LOGMPP instruction and MPPR SPR */ +#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 + +/* Bits 60 and 61 of MPP SPR should be set to one of the following */ +/* Aborting the fetch is indeed setting 00 in the table size bits */ +#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60) +#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60) + +/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */ +#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54) +#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54) +#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) @@ -283,6 +298,8 @@ #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \ + __PPC_RB(b)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0ef17ad..c547b26 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -225,6 +225,7 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */ #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5042ccc..c470d55 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,13 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); +#if defined(CONFIG_PPC_64K_PAGES) +#define MPP_BUFFER_ORDER 0 +#elif defined(CONFIG_PPC_4K_PAGES) +#define MPP_BUFFER_ORDER 3 +#endif + + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -1320,6 +1328,13 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) vcore->first_vcpuid = core * threads_per_subcore; vcore->kvm = kvm; + vcore->mpp_buffer_is_valid = false; + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcore->mpp_buffer = (void *)__get_free_pages( + GFP_KERNEL|__GFP_ZERO, + MPP_BUFFER_ORDER); + return vcore; } @@ -1586,6 +1601,33 @@ static int on_primary_thread(void) return 1; } +static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); + logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); + + vc->mpp_buffer_is_valid = true; +} + +static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + /* We must abort any in-progress save operations to ensure + * the table is valid so that prefetch engine knows when to + * stop prefetching. */ + logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. @@ -1663,9 +1705,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vc->kvm->srcu); + if (vc->mpp_buffer_is_valid) + kvmppc_start_restoring_l2_cache(vc); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); + + if (vc->mpp_buffer) + kvmppc_start_saving_l2_cache(vc); + /* disable sending of IPIs on virtual external irqs */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) vcpu->cpu = -1; @@ -2413,8 +2462,14 @@ static void kvmppc_free_vcores(struct kvm *kvm) { long int i; - for (i = 0; i < KVM_MAX_VCORES; ++i) + for (i = 0; i < KVM_MAX_VCORES; ++i) { + if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; + free_pages((unsigned long)vc->mpp_buffer, + MPP_BUFFER_ORDER); + } kfree(kvm->arch.vcores[i]); + } kvm->arch.online_vcores = 0; } -- cgit v0.10.2 From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..cb77f999 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 6a4309b..0729ba6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d687c6e..3ca79aa 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8e03568..d870bac 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; /* FIXME!! diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14f..00268ca 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a8691b..5a62d91 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2616,7 +2616,7 @@ out: return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec4e3bd..5065b95 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); -int kvm_dev_ioctl_check_extension(long ext); +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4b6c01b..e28f3ca 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; -- cgit v0.10.2 From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:33:08 +0200 Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 884f819..8898caf 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl. 4.4 KVM_CHECK_EXTENSION -Capability: basic +Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl Architectures: all -Type: system ioctl +Type: system ioctl, vm ioctl Parameters: extension identifier (KVM_CAP_*) Returns: 0 if unsupported; 1 (or some other positive integer) if supported @@ -160,6 +160,9 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. +Based on their initialization different VMs may have different capabilities. +It is thus encouraged to use the vm ioctl to query for capabilities (available +with KVM_CAP_CHECK_EXTENSION_VM on the vm fd) 4.5 KVM_GET_VCPU_MMAP_SIZE diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0418b74..51776ca 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 #define KVM_CAP_PPC_ENABLE_HCALL 104 +#define KVM_CAP_CHECK_EXTENSION_VM 105 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e28f3ca..1b95cc9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v0.10.2 From 7a58777a33128b28379851b94070934b17ed9176 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:55:19 +0200 Subject: KVM: PPC: Book3S: Provide different CAPs based on HV or PR mode With Book3S KVM we can create both PR and HV VMs in parallel on the same machine. That gives us new challenges on the CAPs we return - both have different capabilities. When we get asked about CAPs on the kvm fd, there's nothing we can do. We can try to be smart and assume we're running HV if HV is available, PR otherwise. However with the newly added VM CHECK_EXTENSION we can now ask for capabilities directly on a VM which knows whether it's PR or HV. With this patch I can successfully expose KVM PVINFO data to user space in the PR case, fixing magic page mapping for PAPR guests. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d870bac..eaa57da 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -394,11 +394,17 @@ void kvm_arch_sync_events(struct kvm *kvm) int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - /* FIXME!! - * Should some of this be vm ioctl ? is it possible now ? - */ + /* Assume we're using HV mode when the HV module is loaded */ int hv_enabled = kvmppc_hv_ops ? 1 : 0; + if (kvm) { + /* + * Hooray - we know which VM type we're running on. Depend on + * that rather than the guess above. + */ + hv_enabled = is_kvmppc_hv_enabled(kvm); + } + switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: -- cgit v0.10.2 From 63fff5c1e3695ff5c9ad8c198a3b03ca4c0c73a8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 29 Jun 2014 16:47:30 +0530 Subject: KVM: PPC: BOOK3S: HV: Update compute_tlbie_rb to handle 16MB base page When calculating the lower bits of AVA field, use the shift count based on the base page size. Also add the missing segment size and remove stale comment. Signed-off-by: Aneesh Kumar K.V Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index e504f88..07cf9df 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -147,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, */ /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ + + rb |= v >> (62 - 8); /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -177,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, { int aval_shift; /* - * remaining 7bits of AVA/LP fields + * remaining bits of AVA/LP fields * Also contain the rr bits of LP */ - rb |= (va_low & 0x7f) << 16; + rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; /* * Now clear not needed LP bits based on actual psize */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c470d55..27cced9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2064,12 +2064,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, (*sps)->page_shift = def->shift; (*sps)->slb_enc = def->sllp; (*sps)->enc[0].page_shift = def->shift; - /* - * Only return base page encoding. We don't want to return - * all the supporting pte_enc, because our H_ENTER doesn't - * support MPSS yet. Once they do, we can start passing all - * support pte_enc here - */ (*sps)->enc[0].pte_enc = def->penc[linux_psize]; /* * Add 16MB MPSS support if host supports it -- cgit v0.10.2 From 7d15c06f1abfe4b893c6c2c8a306b02210a6a6db Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:52:36 +0200 Subject: KVM: PPC: Implement kvmppc_xlate for all targets We have a nice API to find the translated GPAs of a GVA including protection flags. So far we only use it on Book3S, but there's no reason the same shouldn't be used on BookE as well. Implement a kvmppc_xlate() version for BookE and clean it up to make it more readable in general. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e381363..1a60af9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,6 +52,16 @@ enum instruction_type { INST_SC, /* system call */ }; +enum xlate_instdata { + XLATE_INST, /* translate instruction address */ + XLATE_DATA /* translate data address */ +}; + +enum xlate_readwrite { + XLATE_READ, /* check for read permissions */ + XLATE_WRITE /* check for write permissions */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -94,6 +104,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); +extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, + enum xlate_instdata xlid, enum xlate_readwrite xlrw, + struct kvmppc_pte *pte); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a3cbada..0b6c84e 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -380,9 +380,11 @@ pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, } EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); -static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, - bool iswrite, struct kvmppc_pte *pte) +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) { + bool data = (xlid == XLATE_DATA); + bool iswrite = (xlrw == XLATE_WRITE); int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR)); int r; @@ -434,7 +436,8 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.st++; - r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte); + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); if (r < 0) return r; @@ -459,7 +462,8 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.ld++; - rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte); + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); if (rc) return rc; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 97bcde2..2f697b4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1785,6 +1785,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) #endif } +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) +{ + int gtlb_index; + gpa_t gpaddr; + +#ifdef CONFIG_KVM_E500V2 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + pte->eaddr = eaddr; + pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) | + (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; + } +#endif + + /* Check the guest TLB. */ + switch (xlid) { + case XLATE_INST: + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); + break; + case XLATE_DATA: + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); + break; + default: + BUG(); + } + + /* Do we have a TLB entry at all? */ + if (gtlb_index < 0) + return -ENOENT; + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + + pte->eaddr = eaddr; + pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + + /* XXX read permissions from the guest TLB */ + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; +} + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { -- cgit v0.10.2 From 35c4a7330dbe1ae6f590a5645b185e35ddb3f6d9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:58:16 +0200 Subject: KVM: PPC: Move kvmppc_ld/st to common code We have enough common infrastructure now to resolve GVA->GPA mappings at runtime. With this we can move our book3s specific helpers to load / store in guest virtual address space to common code as well. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a86ca65..172fd6d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -148,8 +148,8 @@ extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); +/* XXX remove this export when load_last_inst() is generic */ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 11385bb..66f5b59 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -111,15 +111,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; + u32 ld; + u32 st; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; u32 sp_storage; u32 sp_instruc; u32 queue_intr; - u32 ld; u32 ld_slow; - u32 st; u32 st_slow; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1a60af9..17fa277 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -80,6 +80,10 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *inst); +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 0b6c84e..de8da33 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -410,87 +410,6 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, return r; } -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) -{ - hva_t hpage; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return kvmppc_bad_hva(); -} - -int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - int r; - - vcpu->stat.st++; - - r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, - XLATE_WRITE, &pte); - if (r < 0) - return r; - - *eaddr = pte.raddr; - - if (!pte.may_write) - return -EPERM; - - if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) - return EMULATE_DO_MMIO; - - return EMULATE_DONE; -} -EXPORT_SYMBOL_GPL(kvmppc_st); - -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - hva_t hva = *eaddr; - int rc; - - vcpu->stat.ld++; - - rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, - XLATE_READ, &pte); - if (rc) - return rc; - - *eaddr = pte.raddr; - - if (!pte.may_read) - return -EPERM; - - if (!data && !pte.may_execute) - return -ENOEXEC; - - hva = kvmppc_pte_to_hva(vcpu, &pte); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } - - return EMULATE_DONE; - -mmio: - return EMULATE_DO_MMIO; -} -EXPORT_SYMBOL_GPL(kvmppc_ld); - int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *inst) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index eaa57da..2c5a1c3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,6 +309,87 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); +static hva_t kvmppc_bad_hva(void) +{ + return PAGE_OFFSET; +} + +static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + hva_t hpage; + + hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (kvm_is_error_hva(hpage)) + goto err; + + return hpage | (pte->raddr & ~PAGE_MASK); +err: + return kvmppc_bad_hva(); +} + +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + int r; + + vcpu->stat.st++; + + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); + if (r < 0) + return r; + + *eaddr = pte.raddr; + + if (!pte.may_write) + return -EPERM; + + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; +} +EXPORT_SYMBOL_GPL(kvmppc_st); + +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + hva_t hva = *eaddr; + int rc; + + vcpu->stat.ld++; + + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); + if (rc) + return rc; + + *eaddr = pte.raddr; + + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + hva = kvmppc_pte_to_hva(vcpu, &pte); + if (kvm_is_error_hva(hva)) + goto mmio; + + if (copy_from_user(ptr, (void __user *)hva, size)) { + printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); + goto mmio; + } + + return EMULATE_DONE; + +mmio: + return EMULATE_DO_MMIO; +} +EXPORT_SYMBOL_GPL(kvmppc_ld); + int kvm_arch_hardware_enable(void *garbage) { return 0; -- cgit v0.10.2 From 9897e88a79e1c6a3c5bbe74eccd64c4ba32a3b19 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:59:43 +0200 Subject: KVM: PPC: Remove kvmppc_bad_hva() We have a proper define for invalid HVA numbers. Use those instead of the ppc specific kvmppc_bad_hva(). Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2c5a1c3..3d59730 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,11 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { hva_t hpage; @@ -324,7 +319,7 @@ static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) return hpage | (pte->raddr & ~PAGE_MASK); err: - return kvmppc_bad_hva(); + return KVM_HVA_ERR_BAD; } int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, -- cgit v0.10.2 From c45c551403f0a7b152e56c53735b954faa36c54c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 14:17:30 +0200 Subject: KVM: PPC: Use kvm_read_guest in kvmppc_ld We have a nice and handy helper to read from guest physical address space, so we should make use of it in kvmppc_ld as we already do for its counterpart in kvmppc_st. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3d59730..be40886 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,19 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) -{ - hva_t hpage; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return KVM_HVA_ERR_BAD; -} - int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { @@ -351,7 +338,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = *eaddr; int rc; vcpu->stat.ld++; @@ -369,19 +355,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!data && !pte.may_execute) return -ENOEXEC; - hva = kvmppc_pte_to_hva(vcpu, &pte); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; return EMULATE_DONE; - -mmio: - return EMULATE_DO_MMIO; } EXPORT_SYMBOL_GPL(kvmppc_ld); -- cgit v0.10.2 From c12fb43c2f6d6a57a4e21afe74ff56485d699ee7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 14:43:36 +0200 Subject: KVM: PPC: Handle magic page in kvmppc_ld/st We use kvmppc_ld and kvmppc_st to emulate load/store instructions that may as well access the magic page. Special case it out so that we can properly access it. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 172fd6d..6166791 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -286,6 +286,13 @@ static inline bool is_kvmppc_resume_guest(int r) return (r == RESUME_GUEST || r == RESUME_GUEST_NV); } +static inline bool is_kvmppc_hv_enabled(struct kvm *kvm); +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Only PR KVM supports the magic page */ + return !is_kvmppc_hv_enabled(vcpu->kvm); +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index cbb1990..f7aa5cc 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -103,4 +103,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } + +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Magic page is only supported on e500v2 */ +#ifdef CONFIG_KVM_E500V2 + return true; +#else + return false; +#endif +} #endif /* __ASM_KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be40886..544d1d3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -312,6 +312,7 @@ EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; int r; @@ -327,6 +328,16 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!pte.may_write) return -EPERM; + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(magic, ptr, size); + return EMULATE_DONE; + } + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) return EMULATE_DO_MMIO; @@ -337,6 +348,7 @@ EXPORT_SYMBOL_GPL(kvmppc_st); int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; int rc; @@ -355,6 +367,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!data && !pte.may_execute) return -ENOEXEC; + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(ptr, magic, size); + return EMULATE_DONE; + } + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) return EMULATE_DO_MMIO; -- cgit v0.10.2 From d69614a295aef72f8fb22da8e3ccf1a8f19a7ffc Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 18 Jun 2014 14:53:49 +0200 Subject: KVM: PPC: Separate loadstore emulation from priv emulation Today the instruction emulator can get called via 2 separate code paths. It can either be called by MMIO emulation detection code or by privileged instruction traps. This is bad, as both code paths prepare the environment differently. For MMIO emulation we already know the virtual address we faulted on, so instructions there don't have to actually fetch that information. Split out the two separate use cases into separate files. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 17fa277..2214ee6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -86,6 +86,7 @@ extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 777f894..1ccd7a1 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -13,8 +13,9 @@ common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. +CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o +common-objs-y += powerpc.o emulate.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o @@ -91,6 +92,7 @@ kvm-book3s_64-module-objs += \ $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ + emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c5c64b6..e96b50d 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -207,25 +207,12 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } -/* XXX to do: - * lhax - * lhaux - * lswx - * lswi - * stswx - * stswi - * lha - * lhau - * lmw - * stmw - * - */ /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst; - int ra, rs, rt, sprn; + int rs, rt, sprn; enum emulation_result emulated; int advance = 1; @@ -238,7 +225,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); - ra = get_ra(inst); rs = get_rs(inst); rt = get_rt(inst); sprn = get_sprn(inst); @@ -270,200 +256,24 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) #endif advance = 0; break; - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; case OP_31_XOP_MFSPR: emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); break; - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_MTSPR: emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: - /* Do nothing. The guest is performing dcbi because - * hardware DMA is not snooped by the dcache, but - * emulated DMA either goes through the dcache as - * normal writes, or the host kernel has handled dcache - * coherence. */ - break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); - break; - case OP_31_XOP_TLBSYNC: break; - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 0); - break; - - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 0); - break; - default: /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; } break; - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ - case OP_LD: - rt = get_rt(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ - case OP_STD: - rs = get_rs(inst); - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 8, 1); - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c new file mode 100644 index 0000000..0de4ffa --- /dev/null +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -0,0 +1,272 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "timing.h" +#include "trace.h" + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + */ +int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 inst; + int ra, rs, rt; + enum emulation_result emulated; + int advance = 1; + + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + case OP_31_XOP_LWZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LBZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_31_XOP_LBZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STWX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_31_XOP_STBX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_31_XOP_STBUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LHAX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STHX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_31_XOP_STHUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_DCBST: + case OP_31_XOP_DCBF: + case OP_31_XOP_DCBI: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case OP_31_XOP_LWBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case OP_31_XOP_STWBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 0); + break; + + case OP_31_XOP_LHBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case OP_31_XOP_STHBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 0); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case OP_LWZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ + case OP_LD: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + + case OP_LWZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LBZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_LBZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STW: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ + case OP_STD: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 8, 1); + break; + + case OP_STWU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STB: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_STBU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_LHZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHA: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STH: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_STHU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + + if (emulated == EMULATE_FAIL) { + advance = 0; + kvmppc_core_queue_program(vcpu, 0); + } + + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); + + /* Advance past emulated instruction. */ + if (advance) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + + return emulated; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 544d1d3..c14ed15 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -272,7 +272,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) enum emulation_result er; int r; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_loadstore(vcpu); switch (er) { case EMULATE_DONE: /* Future optimization: only reload non-volatiles if they were -- cgit v0.10.2 From 8de12015ff75967b16f70e5938b151390dac9b77 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 18 Jun 2014 21:56:55 +0200 Subject: KVM: PPC: Expose helper functions for data/inst faults We're going to implement guest code interpretation in KVM for some rare corner cases. This code needs to be able to inject data and instruction faults into the guest when it encounters them. Expose generic APIs to do this in a reasonably subarch agnostic fashion. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2214ee6..cbee453 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -132,6 +132,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, + ulong esr_flags); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index de8da33..dd03f6b 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -230,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, + ulong flags) +{ + kvmppc_set_dar(vcpu, dar); + kvmppc_set_dsisr(vcpu, flags); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) +{ + u64 msr = kvmppc_get_msr(vcpu); + msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + kvmppc_set_msr_fast(vcpu, msr); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2f697b4..f30948a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -185,24 +185,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, set_bit(priority, &vcpu->arch.pending_exceptions); } -static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); } -static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); } -static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, - ulong esr_flags) +void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); -- cgit v0.10.2 From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 28 Jul 2014 19:29:13 +0200 Subject: KVM: PPC: Remove DCR handling DCR handling was only needed for 440 KVM. Since we removed it, we can also remove handling of DCR accesses. Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 8898caf..a21ff22 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would appear if the VCPU performed a load or store of the appropriate width directly to the byte array. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, - KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and + KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation __u8 is_write; } dcr; -powerpc specific. +Deprecated - was used for 440 KVM. /* KVM_EXIT_OSI */ struct { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 66f5b59..98d9dd5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -94,7 +94,6 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 sum_exits; u32 mmio_exits; - u32 dcr_exits; u32 signal_exits; u32 light_exits; /* Account for special types of light exits: */ @@ -126,7 +125,6 @@ struct kvm_vcpu_stat { enum kvm_exit_types { MMIO_EXITS, - DCR_EXITS, SIGNAL_EXITS, ITLB_REAL_MISS_EXITS, ITLB_VIRT_MISS_EXITS, @@ -601,8 +599,6 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; u8 mmio_sign_extend; - u8 dcr_needed; - u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index cbee453..8e36c1e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -41,7 +41,6 @@ enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ - EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ EMULATE_EXIT_USER, /* emulation requires exit to user-space */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f30948a..b4c89fa 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers; struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio", VCPU_STAT(mmio_exits) }, - { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, @@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EMULATE_AGAIN: return RESUME_GUEST; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - return RESUME_HOST; - case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c14ed15..288b4bb 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) -{ - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); -} - static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; - } else if (vcpu->arch.dcr_needed) { - if (!vcpu->arch.dcr_is_write) - kvmppc_complete_dcr_load(vcpu, run); - vcpu->arch.dcr_needed = 0; } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 07b6110..e44d2b2 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", [SIGNAL_EXITS] = "SIGNAL", [ITLB_REAL_MISS_EXITS] = "ITLBREAL", [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index bf191e7..3123690 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case EMULATED_INST_EXITS: vcpu->stat.emulated_inst_exits++; break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; case DSI_EXITS: vcpu->stat.dsi_exits++; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 51776ca..f6f24ae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -162,7 +162,7 @@ struct kvm_pit_config { #define KVM_EXIT_TPR_ACCESS 12 #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 -#define KVM_EXIT_DCR 15 +#define KVM_EXIT_DCR 15 /* deprecated */ #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 @@ -268,7 +268,7 @@ struct kvm_run { __u64 trans_exc_code; __u32 pgm_code; } s390_ucontrol; - /* KVM_EXIT_DCR */ + /* KVM_EXIT_DCR (deprecated) */ struct { __u32 dcrn; __u32 data; -- cgit v0.10.2 From 5a484c7c1efd2c45f8cc726e4d21283a5324e361 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 30 Jul 2014 15:03:56 +0530 Subject: KVM: PPC: BOOKEHV: rename e500hv_spr to bookehv_spr This are not specific to e500hv but applicable for bookehv (As per comment from Scott Wood on my patch "kvm: ppc: bookehv: Added wrapper macros for shadow registers") Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8e36c1e..fb86a22 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -539,16 +539,16 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } -#define SPRNG_WRAPPER_GET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \ static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ - return mfspr(e500hv_spr); \ + return mfspr(bookehv_spr); \ } \ -#define SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ { \ - mtspr(e500hv_spr, val); \ + mtspr(bookehv_spr, val); \ } \ #define SHARED_WRAPPER_GET(reg, size) \ @@ -573,18 +573,18 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ -#define SPRNG_WRAPPER(reg, e500hv_spr) \ - SPRNG_WRAPPER_GET(reg, e500hv_spr) \ - SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER(reg, bookehv_spr) \ + SPRNG_WRAPPER_GET(reg, bookehv_spr) \ + SPRNG_WRAPPER_SET(reg, bookehv_spr) \ #ifdef CONFIG_KVM_BOOKE_HV -#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ - SPRNG_WRAPPER(reg, e500hv_spr) \ +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SPRNG_WRAPPER(reg, bookehv_spr) \ #else -#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ SHARED_WRAPPER(reg, size) \ #endif -- cgit v0.10.2 From 29577fc00ba40a89fc824f030bcc80c583259346 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 30 Jul 2014 15:25:48 +0200 Subject: KVM: PPC: HV: Remove generic instruction emulation Now that we have properly split load/store instruction emulation and generic instruction emulation, we can move the generic one from kvm.ko to kvm-pr.ko on book3s_64. This reduces the attack surface and amount of code loaded on HV KVM kernels. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1ccd7a1..2d590de 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -48,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ kvm-pr-y := \ fpu.o \ + emulate.o \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ @@ -91,7 +92,6 @@ kvm-book3s_64-module-objs += \ $(KVM)/kvm_main.o \ $(KVM)/eventfd.o \ powerpc.o \ - emulate.o \ emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index e1357cd..a674f09 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -291,6 +291,26 @@ TRACE_EVENT(kvm_unmap_hva, TP_printk("unmap hva 0x%lx\n", __entry->hva) ); +TRACE_EVENT(kvm_ppc_instr, + TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate), + TP_ARGS(inst, _pc, emulate), + + TP_STRUCT__entry( + __field( unsigned int, inst ) + __field( unsigned long, pc ) + __field( unsigned int, emulate ) + ), + + TP_fast_assign( + __entry->inst = inst; + __entry->pc = _pc; + __entry->emulate = emulate; + ), + + TP_printk("inst %u pc 0x%lx emulate %u\n", + __entry->inst, __entry->pc, __entry->emulate) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ -- cgit v0.10.2 From 8e6afa36e754be84b468d7df9e5aa71cf4003f3b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 31 Jul 2014 10:21:59 +0200 Subject: KVM: PPC: PR: Handle FSCR feature deselects We handle FSCR feature bits (well, TAR only really today) lazily when the guest starts using them. So when a guest activates the bit and later uses that feature we enable it for real in hardware. However, when the guest stops using that bit we don't stop setting it in hardware. That means we can potentially lose a trap that the guest expects to happen because it thinks a feature is not active. This patch adds support to drop TAR when then guest turns it off in FSCR. While at it it also restricts FSCR access to 64bit systems - 32bit ones don't have it. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6166791..6acf0c2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -182,6 +182,7 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); +extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 84fddcd..5a2bc4b 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -449,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_GQR7: to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: - vcpu->arch.fscr = spr_val; + kvmppc_set_fscr(vcpu, spr_val); break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: vcpu->arch.bescr = spr_val; break; @@ -593,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_GQR7: *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: *spr_val = vcpu->arch.fscr; break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: *spr_val = vcpu->arch.bescr; break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e7a1fa2..faffb27 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -871,6 +871,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) return RESUME_GUEST; } + +void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) +{ + if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { + /* TAR got dropped, drop it in shadow too */ + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } + vcpu->arch.fscr = fscr; +} #endif int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, -- cgit v0.10.2