From 1ad9f23873a4ee837643be5a29c05e405ec54e18 Mon Sep 17 00:00:00 2001 From: "pingfank@linux.vnet.ibm.com" Date: Tue, 15 Apr 2014 16:33:40 +0800 Subject: KVM: PPC: Book3S: HV: make _PAGE_NUMA take effect Numa fault is a method which help to achieve auto numa balancing. When such a page fault takes place, the page fault handler will check whether the page is placed correctly. If not, migration should be involved to cut down the distance between the cpu and pages. A pte with _PAGE_NUMA help to implement numa fault. It means not to allow the MMU to access the page directly. So a page fault is triggered and numa fault handler gets the opportunity to run checker. As for the access of MMU, we need special handling for the powernv's guest. When we mark a pte with _PAGE_NUMA, we already call mmu_notifier to invalidate it in guest's htab, but when we tried to re-insert them, we firstly try to map it in real-mode. Only after this fails, we fallback to virt mode, and most of important, we run numa fault handler in virt mode. This patch guards the way of real-mode to ensure that if a pte is marked with _PAGE_NUMA, it will NOT be mapped in real mode, instead, it will be mapped in virt mode and have the opportunity to be checked with placement. Signed-off-by: Liu Ping Fan Reviewed-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 1d6c56a..8fcc363 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -234,7 +234,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pte_size = psize; pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); - if (pte_present(pte)) { + if (pte_present(pte) && !pte_numa(pte)) { if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); -- cgit v0.10.2 From 0a8eccefcb3423b45dc0a5a527b4799520e2bc94 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 14 Apr 2014 08:56:26 +1000 Subject: KVM: PPC: Book3S HV: Add missing code for transaction reclaim on guest exit Testing by Michael Neuling revealed that commit e4e38121507a ("KVM: PPC: Book3S HV: Add transactional memory support") is missing the code that saves away the checkpointed state of the guest when switching to the host. This adds that code, which was in earlier versions of the patch but went missing somehow. Reported-by: Michael Neuling Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ffbb871..0ff4701 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1312,6 +1312,110 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mr r3, r9 bl kvmppc_save_fp +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + b 2f +END_FTR_SECTION_IFCLR(CPU_FTR_TM) + /* Turn on TM. */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + ld r5, VCPU_MSR(r9) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beq 1f /* TM not active in guest. */ + + li r3, TM_CAUSE_KVM_RESCHED + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* All GPRs are volatile at this point. */ + TRECLAIM(R3) + + /* Temporarily store r13 and r9 so we have some regs to play with */ + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9, PACATMSCRATCH(r13) + ld r9, HSTATE_KVM_VCPU(r13) + + /* Get a few more GPRs free. */ + std r29, VCPU_GPRS_TM(29)(r9) + std r30, VCPU_GPRS_TM(30)(r9) + std r31, VCPU_GPRS_TM(31)(r9) + + /* Save away PPR and DSCR soon so don't run with user values. */ + mfspr r31, SPRN_PPR + HMT_MEDIUM + mfspr r30, SPRN_DSCR + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 + + /* Save all but r9, r13 & r29-r31 */ + reg = 0 + .rept 29 + .if (reg != 9) && (reg != 13) + std reg, VCPU_GPRS_TM(reg)(r9) + .endif + reg = reg + 1 + .endr + /* ... now save r13 */ + GET_SCRATCH0(r4) + std r4, VCPU_GPRS_TM(13)(r9) + /* ... and save r9 */ + ld r4, PACATMSCRATCH(r13) + std r4, VCPU_GPRS_TM(9)(r9) + + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + /* Save away checkpinted SPRs. */ + std r31, VCPU_PPR_TM(r9) + std r30, VCPU_DSCR_TM(r9) + mflr r5 + mfcr r6 + mfctr r7 + mfspr r8, SPRN_AMR + mfspr r10, SPRN_TAR + std r5, VCPU_LR_TM(r9) + stw r6, VCPU_CR_TM(r9) + std r7, VCPU_CTR_TM(r9) + std r8, VCPU_AMR_TM(r9) + std r10, VCPU_TAR_TM(r9) + + /* Restore r12 as trap number. */ + lwz r12, VCPU_TRAP(r9) + + /* Save FP/VSX. */ + addi r3, r9, VCPU_FPRS_TM + bl .store_fp_state + addi r3, r9, VCPU_VRS_TM + bl .store_vr_state + mfspr r6, SPRN_VRSAVE + stw r6, VCPU_VRSAVE_TM(r9) +1: + /* + * We need to save these SPRs after the treclaim so that the software + * error code is recorded correctly in the TEXASR. Also the user may + * change these outside of a transaction, so they must always be + * context switched. + */ + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) +2: +#endif + /* Increment yield count if they have a VPA */ ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 -- cgit v0.10.2 From ab78475c76bd8c54375d8a778200c59314973d30 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 6 Apr 2014 23:31:48 +0200 Subject: KVM: PPC: Book3S: ifdef on CONFIG_KVM_BOOK3S_32_HANDLER for 32bit The book3s_32 target can get built as module which means we don't see the config define for it in code. Instead, check on the bool define CONFIG_KVM_BOOK3S_32_HANDLER whenever we want to know whether we're building for a book3s_32 host. This fixes running book3s_32 kvm as a module for me. Signed-off-by: Alexander Graf Reviewed-by: Aneesh Kumar K.V diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 94e597e..7af190a 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -886,7 +886,7 @@ static int kvmppc_book3s_init(void) r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (r) return r; -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER r = kvmppc_book3s_init_pr(); #endif return r; @@ -895,7 +895,7 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER kvmppc_book3s_exit_pr(); #endif kvm_exit(); @@ -905,7 +905,7 @@ module_init(kvmppc_book3s_init); module_exit(kvmppc_book3s_exit); /* On 32bit this is our one and only kernel module */ -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER MODULE_ALIAS_MISCDEV(KVM_MINOR); MODULE_ALIAS("devname:kvm"); #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c5c052a..02f1def 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1153,7 +1153,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, goto free_vcpu; vcpu->arch.book3s = vcpu_book3s; -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER vcpu->arch.shadow_vcpu = kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); if (!vcpu->arch.shadow_vcpu) @@ -1198,7 +1198,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); free_vcpu3s: #endif @@ -1215,7 +1215,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); -#ifdef CONFIG_KVM_BOOK3S_32 +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); #endif vfree(vcpu_book3s); -- cgit v0.10.2 From b18db0b80867931f4e3a844400a3c22a4fd2ff57 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 29 Apr 2014 12:17:26 +0200 Subject: KVM guest: Make pv trampoline code executable Our PV guest patching code assembles chunks of instructions on the fly when it encounters more complicated instructions to hijack. These instructions need to live in a section that we don't mark as non-executable, as otherwise we fault when jumping there. Right now we put it into the .bss section where it automatically gets marked as non-executable. Add a check to the NX setting function to ensure that we leave these particular pages executable. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d0e784e..5217903 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -39,6 +39,17 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } +static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_KVM_GUEST + extern char kvm_tmp[]; + return start < (unsigned long)kvm_tmp && + (unsigned long)&kvm_tmp[1024 * 1024] < end; +#else + return 0; +#endif +} + #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6a01752..dd8695f 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -74,7 +74,7 @@ #define KVM_INST_MTSRIN 0x7c0001e4 static bool kvm_patching_worked = true; -static char kvm_tmp[1024 * 1024]; +char kvm_tmp[1024 * 1024]; static int kvm_tmp_index; static inline void kvm_patch_ins(u32 *inst, u32 new_inst) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d766d6e..06ba83b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -207,6 +207,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N; + /* Make kvm guest trampolines executable */ + if (overlaps_kvm_tmp(vaddr, vaddr + step)) + tprot &= ~HPTE_R_N; + /* * If relocatable, check if it overlaps interrupt vectors that * are copied down to real 0. For relocatable kernel -- cgit v0.10.2