From e59d24e61269de34d79d2f39d3d581c219ac7a94 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 6 Feb 2014 17:36:56 +0100 Subject: KVM: PPC: Book3S HV: Fix incorrect userspace exit on ioeventfd write When the guest does an MMIO write which is handled successfully by an ioeventfd, ioeventfd_write() returns 0 (success) and kvmppc_handle_store() returns EMULATE_DONE. Then kvmppc_emulate_mmio() converts EMULATE_DONE to RESUME_GUEST_NV and this causes an exit from the loop in kvmppc_vcpu_run_hv(), causing an exit back to userspace with a bogus exit reason code, typically causing userspace (e.g. qemu) to crash with a message about an unknown exit code. This adds handling of RESUME_GUEST_NV in kvmppc_vcpu_run_hv() in order to fix that. For generality, we define a helper to check for either of the return-to-guest codes we use, RESUME_GUEST and RESUME_GUEST_NV, to make it easy to check for either and provide one place to update if any other return-to-guest code gets defined in future. Since it only affects Book3S HV for now, the helper is added to the kvm_book3s.h header file. We use the helper in two places in kvmppc_run_core() as well for future-proofing, though we don't see RESUME_GUEST_NV in either place at present. [paulus@samba.org - combined 4 patches into one, rewrote description] Suggested-by: Paul Mackerras Signed-off-by: Alexey Kardashevskiy Signed-off-by: Greg Kurz Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 83851aa..bb1e38a2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -304,6 +304,11 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } +static inline bool is_kvmppc_resume_guest(int r) +{ + return (r == RESUME_GUEST || r == RESUME_GUEST_NV); +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 17fc949..3b498d9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1530,7 +1530,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vcpu->arch.trap = 0; if (vcpu->arch.ceded) { - if (ret != RESUME_GUEST) + if (!is_kvmppc_resume_guest(ret)) kvmppc_end_cede(vcpu); else kvmppc_set_timer(vcpu); @@ -1541,7 +1541,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_INACTIVE; list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, arch.run_list) { - if (vcpu->arch.ret != RESUME_GUEST) { + if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); } @@ -1731,7 +1731,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); } - } while (r == RESUME_GUEST); + } while (is_kvmppc_resume_guest(r)); out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; -- cgit v0.10.2 From 69e9fbb278af8de3059f1d1017b52a32b5f9f0bd Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 21 Feb 2014 16:31:10 +0100 Subject: KVM: PPC: Book3S: Introduce hypervisor call H_GET_TCE This introduces the H_GET_TCE hypervisor call, which is basically the reverse of H_PUT_TCE, as defined in the Power Architecture Platform Requirements (PAPR). The hcall H_GET_TCE is required by the kdump kernel, which uses it to retrieve TCEs set up by the previous (panicked) kernel. Signed-off-by: Laurent Dufour Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fcd53f0..4096f16 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -129,6 +129,8 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); +extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba); extern struct kvm_rma_info *kvm_alloc_rma(void); extern void kvm_release_rma(struct kvm_rma_info *ri); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 2c25f54..89e96b3 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -75,3 +75,31 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_TOO_HARD; } EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); + +long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba) +{ + struct kvm *kvm = vcpu->kvm; + struct kvmppc_spapr_tce_table *stt; + + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == liobn) { + unsigned long idx = ioba >> SPAPR_TCE_SHIFT; + struct page *page; + u64 *tbl; + + if (ioba >= stt->window_size) + return H_PARAMETER; + + page = stt->pages[idx / TCES_PER_PAGE]; + tbl = (u64 *)page_address(page); + + vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; + return H_SUCCESS; + } + } + + /* Didn't find the liobn, punt it to userspace */ + return H_TOO_HARD; +} +EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 818dce3..7c5788c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1691,7 +1691,7 @@ hcall_real_table: .long 0 /* 0x10 - H_CLEAR_MOD */ .long 0 /* 0x14 - H_CLEAR_REF */ .long .kvmppc_h_protect - hcall_real_table - .long 0 /* 0x1c - H_GET_TCE */ + .long .kvmppc_h_get_tce - hcall_real_table .long .kvmppc_h_put_tce - hcall_real_table .long 0 /* 0x24 - H_SET_SPRG0 */ .long .kvmppc_h_set_dabr - hcall_real_table -- cgit v0.10.2 From 7505258c5fcb0a1cc3c76a47b4cf9506d21d10e6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 25 Mar 2014 10:47:01 +1100 Subject: KVM: PPC: Book3S HV: Fix KVM hang with CONFIG_KVM_XICS=n I noticed KVM is broken when KVM in-kernel XICS emulation (CONFIG_KVM_XICS) is disabled. The problem was introduced in 48eaef05 (KVM: PPC: Book3S HV: use xics_wake_cpu only when defined). It used CONFIG_KVM_XICS to wrap xics_wake_cpu, where CONFIG_PPC_ICP_NATIVE should have been used. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3b498d9..e0a535c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -86,7 +86,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) /* CPU points to the first thread of the core */ if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { -#ifdef CONFIG_KVM_XICS +#ifdef CONFIG_PPC_ICP_NATIVE int real_cpu = cpu + vcpu->arch.ptid; if (paca[real_cpu].kvm_hstate.xics_phys) xics_wake_cpu(real_cpu); @@ -1360,9 +1360,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (cpu != smp_processor_id()) { -#ifdef CONFIG_KVM_XICS xics_wake_cpu(cpu); -#endif if (vcpu->arch.ptid) ++vc->n_woken; } -- cgit v0.10.2 From e4e38121507a27d2ccc4b28d9e7fc4818a12c44c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 25 Mar 2014 10:47:02 +1100 Subject: KVM: PPC: Book3S HV: Add transactional memory support This adds saving of the transactional memory (TM) checkpointed state on guest entry and exit. We only do this if we see that the guest has an active transaction. It also adds emulation of the TM state changes when delivering IRQs into the guest. According to the architecture, if we are transactional when an IRQ occurs, the TM state is changed to suspended, otherwise it's left unchanged. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6ba8d4a..af21e87 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -213,6 +213,7 @@ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ +#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index 9dfbc34..386a3ef 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -7,6 +7,8 @@ #include +#ifndef __ASSEMBLY__ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void do_load_up_transact_fpu(struct thread_struct *thread); extern void do_load_up_transact_altivec(struct thread_struct *thread); @@ -20,3 +22,5 @@ extern void tm_recheckpoint(struct thread_struct *thread, extern void tm_abort(uint8_t cause); extern void tm_save_sprs(struct thread_struct *thread); extern void tm_restore_sprs(struct thread_struct *thread); + +#endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 303ece7..fb25ebc 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -262,7 +262,14 @@ int kvmppc_mmu_hv_init(void) static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) { - kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); + unsigned long msr = vcpu->arch.intr_msr; + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + msr |= MSR_TS_S; + else + msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; + kvmppc_set_msr(vcpu, msr); } /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7c5788c..61190dd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -28,6 +28,9 @@ #include #include #include +#include + +#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) #ifdef __LITTLE_ENDIAN__ #error Need to fix lppaca and SLB shadow accesses in little endian mode @@ -597,6 +600,116 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + b skip_tm +END_FTR_SECTION_IFCLR(CPU_FTR_TM) + + /* Turn on TM/FP/VSX/VMX so we can restore them. */ + mfmsr r5 + li r6, MSR_TM >> 32 + sldi r6, r6, 32 + or r5, r5, r6 + ori r5, r5, MSR_FP + oris r5, r5, (MSR_VEC | MSR_VSX)@h + mtmsrd r5 + + /* + * The user may change these outside of a transaction, so they must + * always be context switched. + */ + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + + ld r5, VCPU_MSR(r4) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beq skip_tm /* TM not active in guest */ + + /* Make sure the failure summary is set, otherwise we'll program check + * when we trechkpt. It's possible that this might have been not set + * on a kvmppc_set_one_reg() call but we shouldn't let this crash the + * host. + */ + oris r7, r7, (TEXASR_FS)@h + mtspr SPRN_TEXASR, r7 + + /* + * We need to load up the checkpointed state for the guest. + * We need to do this early as it will blow away any GPRs, VSRs and + * some SPRs. + */ + + mr r31, r4 + addi r3, r31, VCPU_FPRS_TM + bl .load_fp_state + addi r3, r31, VCPU_VRS_TM + bl .load_vr_state + mr r4, r31 + lwz r7, VCPU_VRSAVE_TM(r4) + mtspr SPRN_VRSAVE, r7 + + ld r5, VCPU_LR_TM(r4) + lwz r6, VCPU_CR_TM(r4) + ld r7, VCPU_CTR_TM(r4) + ld r8, VCPU_AMR_TM(r4) + ld r9, VCPU_TAR_TM(r4) + mtlr r5 + mtcr r6 + mtctr r7 + mtspr SPRN_AMR, r8 + mtspr SPRN_TAR, r9 + + /* + * Load up PPR and DSCR values but don't put them in the actual SPRs + * till the last moment to avoid running with userspace PPR and DSCR for + * too long. + */ + ld r29, VCPU_DSCR_TM(r4) + ld r30, VCPU_PPR_TM(r4) + + std r2, PACATMSCRATCH(r13) /* Save TOC */ + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* Load GPRs r0-r28 */ + reg = 0 + .rept 29 + ld reg, VCPU_GPRS_TM(reg)(r31) + reg = reg + 1 + .endr + + mtspr SPRN_DSCR, r29 + mtspr SPRN_PPR, r30 + + /* Load final GPRs */ + ld 29, VCPU_GPRS_TM(29)(r31) + ld 30, VCPU_GPRS_TM(30)(r31) + ld 31, VCPU_GPRS_TM(31)(r31) + + /* TM checkpointed state is now setup. All GPRs are now volatile. */ + TRECHKPT + + /* Now let's get back the state we need. */ + HMT_MEDIUM + GET_PACA(r13) + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 + ld r4, HSTATE_KVM_VCPU(r13) + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATMSCRATCH(r13) + + /* Set the MSR RI since we have our registers back. */ + li r5, MSR_RI + mtmsrd r5, 1 +skip_tm: +#endif + /* Load guest PMU registers */ /* R4 is live here (vcpu pointer) */ li r3, 1 @@ -704,14 +817,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_VTB(r4) mtspr SPRN_IC, r5 mtspr SPRN_VTB, r6 -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) - mtspr SPRN_TFHAR, r5 - mtspr SPRN_TFIAR, r6 - mtspr SPRN_TEXASR, r7 -#endif ld r8, VCPU_EBBHR(r4) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) @@ -817,7 +922,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 12: mtspr SPRN_SRR0, r10 mr r10,r0 mtspr SPRN_SRR1, r11 - ld r11, VCPU_INTR_MSR(r4) + mr r9, r4 + bl kvmppc_msr_interrupt 5: /* @@ -1103,12 +1209,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) BEGIN_FTR_SECTION b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ - mfmsr r8 - li r0, 1 - rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r8 - /* Save POWER8-specific registers */ mfspr r5, SPRN_IAMR mfspr r6, SPRN_PSPB @@ -1122,14 +1222,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r5, VCPU_IC(r9) std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - mfspr r5, SPRN_TFHAR - mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR - std r5, VCPU_TFHAR(r9) - std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) -#endif mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR @@ -1557,7 +1649,7 @@ kvmppc_hdsi: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_DATA_STORAGE - ld r11, VCPU_INTR_MSR(r9) + bl kvmppc_msr_interrupt fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) lwz r8, VCPU_XER(r9) @@ -1626,7 +1718,7 @@ kvmppc_hisi: 1: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_INST_STORAGE - ld r11, VCPU_INTR_MSR(r9) + bl kvmppc_msr_interrupt b fast_interrupt_c_return 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ @@ -1669,7 +1761,7 @@ sc_1_fast_return: mtspr SPRN_SRR0,r10 mtspr SPRN_SRR1,r11 li r10, BOOK3S_INTERRUPT_SYSCALL - ld r11, VCPU_INTR_MSR(r9) + bl kvmppc_msr_interrupt mr r4,r9 b fast_guest_return @@ -1997,7 +2089,7 @@ machine_check_realmode: beq mc_cont /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - ld r11, VCPU_INTR_MSR(r9) + bl kvmppc_msr_interrupt b fast_interrupt_c_return /* @@ -2138,8 +2230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mfspr r6,SPRN_VRSAVE stw r6,VCPU_VRSAVE(r31) mtlr r30 - mtmsrd r5 - isync blr /* @@ -2186,3 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ kvmppc_bad_host_intr: b . + +/* + * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken + * from VCPU_INTR_MSR and is modified based on the required TM state changes. + * r11 has the guest MSR value (in/out) + * r9 has a vcpu pointer (in) + * r0 is used as a scratch register + */ +kvmppc_msr_interrupt: + rldicl r0, r11, 64 - MSR_TS_S_LG, 62 + cmpwi r0, 2 /* Check if we are in transactional state.. */ + ld r11, VCPU_INTR_MSR(r9) + bne 1f + /* ... if transactional, change to suspended */ + li r0, 1 +1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG + blr -- cgit v0.10.2 From a7d80d01c68ed7d3fbc7bcf4541e6fb7e6b87cd6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 25 Mar 2014 10:47:03 +1100 Subject: KVM: PPC: Book3S HV: Add get/set_one_reg for new TM state This adds code to get/set_one_reg to read and write the new transactional memory (TM) state. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e0a535c..a6d8f01 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -879,17 +879,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); break; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - case KVM_REG_PPC_TFHAR: - *val = get_reg_val(id, vcpu->arch.tfhar); - break; - case KVM_REG_PPC_TFIAR: - *val = get_reg_val(id, vcpu->arch.tfiar); - break; - case KVM_REG_PPC_TEXASR: - *val = get_reg_val(id, vcpu->arch.texasr); - break; -#endif case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -970,6 +959,69 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_PPR: *val = get_reg_val(id, vcpu->arch.ppr); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + *val = get_reg_val(id, vcpu->arch.tfhar); + break; + case KVM_REG_PPC_TFIAR: + *val = get_reg_val(id, vcpu->arch.tfiar); + break; + case KVM_REG_PPC_TEXASR: + *val = get_reg_val(id, vcpu->arch.texasr); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + i = id - KVM_REG_PPC_TM_GPR0; + *val = get_reg_val(id, vcpu->arch.gpr_tm[i]); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int j; + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j]; + else { + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + val->vval = vcpu->arch.vr_tm.vr[i-32]; + else + r = -ENXIO; + } + break; + } + case KVM_REG_PPC_TM_CR: + *val = get_reg_val(id, vcpu->arch.cr_tm); + break; + case KVM_REG_PPC_TM_LR: + *val = get_reg_val(id, vcpu->arch.lr_tm); + break; + case KVM_REG_PPC_TM_CTR: + *val = get_reg_val(id, vcpu->arch.ctr_tm); + break; + case KVM_REG_PPC_TM_FPSCR: + *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr); + break; + case KVM_REG_PPC_TM_AMR: + *val = get_reg_val(id, vcpu->arch.amr_tm); + break; + case KVM_REG_PPC_TM_PPR: + *val = get_reg_val(id, vcpu->arch.ppr_tm); + break; + case KVM_REG_PPC_TM_VRSAVE: + *val = get_reg_val(id, vcpu->arch.vrsave_tm); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]); + else + r = -ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr_tm); + break; + case KVM_REG_PPC_TM_TAR: + *val = get_reg_val(id, vcpu->arch.tar_tm); + break; +#endif case KVM_REG_PPC_ARCH_COMPAT: *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; @@ -1039,17 +1091,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); break; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - case KVM_REG_PPC_TFHAR: - vcpu->arch.tfhar = set_reg_val(id, *val); - break; - case KVM_REG_PPC_TFIAR: - vcpu->arch.tfiar = set_reg_val(id, *val); - break; - case KVM_REG_PPC_TEXASR: - vcpu->arch.texasr = set_reg_val(id, *val); - break; -#endif case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; @@ -1144,6 +1185,68 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_PPR: vcpu->arch.ppr = set_reg_val(id, *val); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + vcpu->arch.tfhar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFIAR: + vcpu->arch.tfiar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TEXASR: + vcpu->arch.texasr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + i = id - KVM_REG_PPC_TM_GPR0; + vcpu->arch.gpr_tm[i] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int j; + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j]; + else + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr_tm.vr[i-32] = val->vval; + else + r = -ENXIO; + break; + } + case KVM_REG_PPC_TM_CR: + vcpu->arch.cr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_LR: + vcpu->arch.lr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_CTR: + vcpu->arch.ctr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_FPSCR: + vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_AMR: + vcpu->arch.amr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_PPR: + vcpu->arch.ppr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VRSAVE: + vcpu->arch.vrsave_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val); + else + r = - ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + vcpu->arch.dscr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_TAR: + vcpu->arch.tar_tm = set_reg_val(id, *val); + break; +#endif case KVM_REG_PPC_ARCH_COMPAT: r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; -- cgit v0.10.2 From b24f36f33ea088771c2bb7c09e84d0ddea35cf55 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Mar 2014 10:47:04 +1100 Subject: KVM: PPC: Book3S: Trim top 4 bits of physical address in RTAS code The in-kernel emulation of RTAS functions needs to read the argument buffer from guest memory in order to find out what function is being requested. The guest supplies the guest physical address of the buffer, and on a real system the code that reads that buffer would run in guest real mode. In guest real mode, the processor ignores the top 4 bits of the address specified in load and store instructions. In order to emulate that behaviour correctly, we need to mask off those bits before calling kvm_read_guest() or kvm_write_guest(). This adds that masking. Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index cf95cde..7a05315 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -213,8 +213,11 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) gpa_t args_phys; int rc; - /* r4 contains the guest physical address of the RTAS args */ - args_phys = kvmppc_get_gpr(vcpu, 4); + /* + * r4 contains the guest physical address of the RTAS args + * Mask off the top 4 bits since this is a guest real address + */ + args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) -- cgit v0.10.2 From 739e2425fea6349ac674e93648953b3a08985f2f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Mar 2014 10:47:05 +1100 Subject: KVM: PPC: Book3S HV: Return ENODEV error rather than EIO If an attempt is made to load the kvm-hv module on a machine which doesn't have hypervisor mode available, return an ENODEV error, which is the conventional thing to return to indicate that this module is not applicable to the hardware of the current machine, rather than EIO, which causes a warning to be printed. Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a6d8f01..8227dba 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2467,7 +2467,7 @@ static int kvmppc_book3s_init_hv(void) */ r = kvmppc_core_check_processor_compat_hv(); if (r < 0) - return r; + return -ENODEV; kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; -- cgit v0.10.2 From 797f9c07eb4cbc2d0ff27fac165a0b885da38840 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Mar 2014 10:47:06 +1100 Subject: KVM: PPC: Book3S HV: Don't use kvm_memslots() in real mode With HV KVM, some high-frequency hypercalls such as H_ENTER are handled in real mode, and need to access the memslots array for the guest. Accessing the memslots array is safe, because we hold the SRCU read lock for the whole time that a guest vcpu is running. However, the checks that kvm_memslots() does when lockdep is enabled are potentially unsafe in real mode, when only the linear mapping is available. Furthermore, kvm_memslots() can be called from a secondary CPU thread, which is an offline CPU from the point of view of the host kernel, and is not running the task which holds the SRCU read lock. To avoid false positives in the checks in kvm_memslots(), and to avoid possible side effects from doing the checks in real mode, this replaces kvm_memslots() with kvm_memslots_raw() in all the places that execute in real mode. kvm_memslots_raw() is a new function that is like kvm_memslots() but uses rcu_dereference_raw_notrace() instead of kvm_dereference_check(). Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index bf0fa8b0a..51388be 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -289,6 +289,18 @@ static inline void note_hpte_modification(struct kvm *kvm, if (atomic_read(&kvm->arch.hpte_mod_interest)) rev->guest_rpte |= HPTE_GR_MODIFIED; } + +/* + * Like kvm_memslots(), but for use in real mode when we can't do + * any RCU stuff (since the secondary threads are offline from the + * kernel's point of view), and we can't print anything. + * Thus we use rcu_dereference_raw() rather than rcu_dereference_check(). + */ +static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) +{ + return rcu_dereference_raw_notrace(kvm->memslots); +} + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 37fb3ca..1d6c56a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -111,7 +111,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); ptel = rev->guest_rpte |= rcbits; gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); - memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); if (!memslot) return; @@ -192,7 +192,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Find the memslot (if any) for this address */ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); gfn = gpa >> PAGE_SHIFT; - memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); pa = 0; is_io = ~0ul; rmap = NULL; @@ -670,7 +670,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, psize = hpte_page_size(v, r); gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; - memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); if (memslot) { hva = __gfn_to_hva_memslot(memslot, gfn); pte = lookup_linux_pte_and_update(pgdir, hva, -- cgit v0.10.2 From c5fb80d3b24f6280bd6f608d8f2a02139a0fabaf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Mar 2014 10:47:07 +1100 Subject: KVM: PPC: Book3S HV: Fix decrementer timeouts with non-zero TB offset Commit c7699822bc21 ("KVM: PPC: Book3S HV: Make physical thread 0 do the MMU switching") reordered the guest entry/exit code so that most of the guest register save/restore code happened in guest MMU context. A side effect of that is that the timebase still contains the guest timebase value at the point where we compute and use vcpu->arch.dec_expires, and therefore that is now a guest timebase value rather than a host timebase value. That in turn means that the timeouts computed in kvmppc_set_timer() are wrong if the timebase offset for the guest is non-zero. The consequence of that is things such as "sleep 1" in a guest after migration may sleep for much longer than they should. This fixes the problem by converting between guest and host timebase values as necessary, by adding or subtracting the timebase offset. This also fixes an incorrect comment. Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 61190dd..42bd2e6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -841,6 +841,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * Set the decrementer to the guest decrementer. */ ld r8,VCPU_DEC_EXPIRES(r4) + /* r8 is a host timebase value here, convert to guest TB */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r6,VCORE_TB_OFFSET(r5) + add r8,r8,r6 mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 @@ -1204,6 +1208,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) mftb r6 extsw r5,r5 add r5,r5,r6 + /* r5 is a guest timebase value here, convert to host TB */ + ld r3,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_TB_OFFSET(r3) + subf r5,r4,r5 std r5,VCPU_DEC_EXPIRES(r9) BEGIN_FTR_SECTION @@ -1479,7 +1487,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 beq 17f - mftb r6 /* current host timebase */ + mftb r6 /* current guest timebase */ subf r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ mftb r7 /* check if lower 24 bits overflowed */ -- cgit v0.10.2 From 72cde5a88d37ba88ad1d47aecf957a9e528636d7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Mar 2014 10:47:08 +1100 Subject: KVM: PPC: Book3S HV: Save/restore host PMU registers that are new in POWER8 Currently we save the host PMU configuration, counter values, etc., when entering a guest, and restore it on return from the guest. (We have to do this because the guest has control of the PMU while it is executing.) However, we missed saving/restoring the SIAR and SDAR registers, as well as the registers which are new on POWER8, namely SIER and MMCR2. This adds code to save the values of these registers when entering the guest and restore them on exit. This also works around the bug in POWER8 where setting PMAE with a counter already negative doesn't generate an interrupt. Signed-off-by: Paul Mackerras Acked-by: Scott Wood diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index f3a91dc..821725c 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -94,7 +94,7 @@ struct kvmppc_host_state { unsigned long xics_phys; u32 saved_xirr; u64 dabr; - u64 host_mmcr[3]; + u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index e873796..e18e3cf 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -71,6 +71,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtmsrd r10,1 /* Save host PMU registers */ +BEGIN_FTR_SECTION + /* Work around P8 PMAE bug */ + li r3, -1 + clrrdi r3, r3, 10 + mfspr r8, SPRN_MMCR2 + mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */ + isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ @@ -87,9 +95,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 + mfspr r9, SPRN_SIAR + mfspr r10, SPRN_SDAR std r7, HSTATE_MMCR(r13) std r5, HSTATE_MMCR + 8(r13) std r6, HSTATE_MMCR + 16(r13) + std r9, HSTATE_MMCR + 24(r13) + std r10, HSTATE_MMCR + 32(r13) +BEGIN_FTR_SECTION + mfspr r9, SPRN_SIER + std r8, HSTATE_MMCR + 40(r13) + std r9, HSTATE_MMCR + 48(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 @@ -110,6 +127,11 @@ BEGIN_FTR_SECTION stw r10, HSTATE_PMC + 24(r13) stw r11, HSTATE_PMC + 28(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +BEGIN_FTR_SECTION + mfspr r9, SPRN_SIER + std r8, HSTATE_MMCR + 40(r13) + std r9, HSTATE_MMCR + 48(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 31: /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 42bd2e6..4963335 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -109,8 +109,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) + ld r6, HSTATE_MMCR + 24(r13) + ld r7, HSTATE_MMCR + 32(r13) mtspr SPRN_MMCR1, r4 mtspr SPRN_MMCRA, r5 + mtspr SPRN_SIAR, r6 + mtspr SPRN_SDAR, r7 +BEGIN_FTR_SECTION + ld r8, HSTATE_MMCR + 40(r13) + ld r9, HSTATE_MMCR + 48(r13) + mtspr SPRN_MMCR2, r8 + mtspr SPRN_SIER, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync 23: -- cgit v0.10.2