From 3cd60e31185343d4132ca7cf3c9becb903b3ec1b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 4 Jun 2014 16:47:55 +0530 Subject: KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation We use time base for PURR and SPURR emulation with PR KVM since we are emulating a single threaded core. When using time base we need to make sure that we don't accumulate time spent in the host in PURR and SPURR value. Also we don't need to emulate mtspr because both the registers are hypervisor resource. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f52f656..a20cc0b 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 purr_offset; - u64 spurr_offset; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bb66d8b..4a58731 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -503,8 +503,8 @@ struct kvm_vcpu_arch { #ifdef CONFIG_BOOKE u32 decar; #endif - u32 tbl; - u32 tbu; + /* Time base value when we entered the guest */ + u64 entry_tb; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3f29526..3565e77 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; - case SPRN_PURR: - to_book3s(vcpu)->purr_offset = spr_val - get_tb(); - break; - case SPRN_SPURR: - to_book3s(vcpu)->spurr_offset = spr_val - get_tb(); - break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: @@ -572,10 +566,16 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = 0; break; case SPRN_PURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated purr + */ + *spr_val = vcpu->arch.purr; break; case SPRN_SPURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated spurr + */ + *spr_val = vcpu->arch.spurr; break; case SPRN_GQR0: case SPRN_GQR1: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8eef1e5..671f5c92 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -120,6 +120,11 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif + /* + * Now also save the current time base value. We use this + * to find the guest purr and spurr value. + */ + vcpu->arch.entry_tb = get_tb(); svcpu->in_use = true; } @@ -166,6 +171,12 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.shadow_fscr = svcpu->shadow_fscr; #endif + /* + * Update purr and spurr using time base on exit. + */ + vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; + svcpu->in_use = false; out: -- cgit v0.10.2 From 4f853a714bf16338ff5261128e6c7ae2569e9505 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 9 May 2014 23:31:31 +0200 Subject: arm/arm64: KVM: Fix and refactor unmap_range unmap_range() was utterly broken, to quote Marc, and broke in all sorts of situations. It was also quite complicated to follow and didn't follow the usual scheme of having a separate iterating function for each level of page tables. Address this by refactoring the code and introduce a pgd_clear() function. Reviewed-by: Jungseok Lee Reviewed-by: Mario Smarduch Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..5cc0b0f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16f8049..2336061 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static bool page_empty(void *ptr) +static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); + pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); } static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - if (pud_huge(*pud)) { - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); - } + pmd_t *pmd_table = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - if (kvm_pmd_huge(*pmd)) { - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); - } + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(kvm_pmd_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); put_page(virt_to_page(pmd)); } -static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) { - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (kvm_pte_table_empty(start_pte)) + clear_pmd_entry(kvm, pmd, start_addr); } -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - unsigned long long start, u64 size) +static void unmap_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long long addr = start, end = start + size; - u64 next; + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; - while (addr < end) { - pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - pte = NULL; - if (pud_none(*pud)) { - addr = kvm_pud_addr_end(addr, end); - continue; - } - - if (pud_huge(*pud)) { - /* - * If we are dealing with a huge pud, just clear it and - * move on. - */ - clear_pud_entry(kvm, pud, addr); - addr = kvm_pud_addr_end(addr, end); - continue; + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); + } else { + unmap_ptes(kvm, pmd, addr, next); + } } + } while (pmd++, addr = next, addr != end); - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr = kvm_pmd_addr_end(addr, end); - continue; - } + if (kvm_pmd_table_empty(start_pmd)) + clear_pud_entry(kvm, pud, start_addr); +} - if (!kvm_pmd_huge(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; - } +static void unmap_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; - /* - * If the pmd entry is to be cleared, walk back up the ladder - */ - if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { - clear_pmd_entry(kvm, pmd, addr); - next = kvm_pmd_addr_end(addr, end); - if (page_empty(pmd) && !page_empty(pud)) { - clear_pud_entry(kvm, pud, addr); - next = kvm_pud_addr_end(addr, end); + start_pud = pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pud)); + } else { + unmap_pmds(kvm, pud, addr, next); } } + } while (pud++, addr = next, addr != end); - addr = next; - } + if (kvm_pud_table_empty(start_pud)) + clear_pgd_entry(kvm, pgd, start_addr); +} + + +static void unmap_range(struct kvm *kvm, pgd_t *pgdp, + phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + pgd = pgdp + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + unmap_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); } static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7d29847..8e138c7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifndef CONFIG_ARM64_64K_PAGES +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#else +#define kvm_pmd_table_empty(pmdp) (0) +#endif +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) -- cgit v0.10.2 From df6ce24f2ee485c4f9a5cb610063a5eb60da8267 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 6 Jun 2014 11:10:23 +0200 Subject: ARM: KVM: Unmap IPA on memslot delete/move Currently when a KVM region is deleted or moved after KVM_SET_USER_MEMORY_REGION ioctl, the corresponding intermediate physical memory is not unmapped. This patch corrects this and unmaps the region's IPA range in kvm_arch_commit_memory_region using unmap_stage2_range. Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..d7424ef 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} /** * kvm_arch_destroy_vm - destroy the VM data structure @@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - return 0; -} - -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2336061..b2a708b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1111,3 +1111,49 @@ out: free_hyp_pgds(); return err; } + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) +{ + gpa_t gpa = old->base_gfn << PAGE_SHIFT; + phys_addr_t size = old->npages << PAGE_SHIFT; + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} -- cgit v0.10.2 From b88657674d39fc2127d62d0de9ca142e166443c8 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 26 Jun 2014 01:45:51 +0100 Subject: ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping A userspace process can map device MMIO memory via VFIO or /dev/mem, e.g., for platform device passthrough support in QEMU. During early development, we found the PAGE_S2 memory type being used for MMIO mappings. This patch corrects that by using the more strongly ordered memory type for device MMIO mappings: PAGE_S2_DEVICE. Signed-off-by: Kim Phillips Acked-by: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b2a708b..16e7994 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -759,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; + pgprot_t mem_type = PAGE_S2; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -809,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; + if (kvm_is_mmio_pfn(pfn)) + mem_type = PAGE_S2_DEVICE; + spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -816,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); if (hugetlb) { - pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); if (writable) { kvm_set_s2pmd_writable(&new_pmd); @@ -825,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { - pte_t new_pte = pfn_pte(pfn, PAGE_S2); + pte_t new_pte = pfn_pte(pfn, mem_type); if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, + mem_type == PAGE_S2_DEVICE); } -- cgit v0.10.2 From efd48ceacea78e4d4656aa0a6bf4c5b92ed22130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 1 Jul 2014 16:53:13 +0100 Subject: arm64: KVM: export demux regids as KVM_REG_ARM64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I suspect this is a -ECUTPASTE fault from the initial implementation. If we don't declare the register ID to be KVM_REG_ARM64 the KVM_GET_ONE_REG implementation kvm_arm_get_reg() returns -EINVAL and hilarity ensues. The kvm/api.txt document describes all arm64 registers as starting with 0x60xx... (i.e KVM_REG_ARM64). Signed-off-by: Alex Bennée Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c59a1bd..34f25a5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -962,7 +962,7 @@ static unsigned int num_demux_regs(void) static int write_demux_regids(u64 __user *uindices) { - u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; unsigned int i; val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; -- cgit v0.10.2 From 1df08ba0aa95f1a8832b7162eec51069bd9be7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 4 Jul 2014 15:54:14 +0100 Subject: arm64: KVM: allow export and import of generic timer regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For correct guest suspend/resume behaviour we need to ensure we include the generic timer registers for 64 bit guests. As CONFIG_KVM_ARM_TIMER is always set for arm64 we don't need to worry about null implementations. However I have re-jigged the kvm_arm_timer_set/get_reg declarations to be in the common include/kvm/arm_arch_timer.h headers. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 193ceaf..dc4e3ed 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -228,7 +228,4 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index b23a59c..986e625 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index) return false; } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} - #else #define NUM_TIMER_REGS 3 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 60b5c31..8d1ec28 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -136,13 +136,67 @@ static unsigned long num_core_regs(void) } /** + * ARM64 versions of the TIMER registers, always available on arm64 + */ + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + +/** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG * * This is for all registers. */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) + + NUM_TIMER_REGS; } /** @@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { if (put_user(core_reg | i, uindices)) @@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6d9aedd..ad9db60 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -67,6 +67,10 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #else static inline int kvm_timer_hyp_init(void) { @@ -84,6 +88,16 @@ static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} + +static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} #endif #endif -- cgit v0.10.2 From 96f68023bfb359a508e7e106eae5e3904669a999 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 11:57:07 +0100 Subject: arm64: GICv3 device tree binding documentation Add the necessary documentation to support GICv3. Cc: Thomas Gleixner Cc: Mark Rutland Acked-by: Catalin Marinas Acked-by: Rob Herring Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/Documentation/devicetree/bindings/arm/gic-v3.txt b/Documentation/devicetree/bindings/arm/gic-v3.txt new file mode 100644 index 0000000..33cd05e --- /dev/null +++ b/Documentation/devicetree/bindings/arm/gic-v3.txt @@ -0,0 +1,79 @@ +* ARM Generic Interrupt Controller, version 3 + +AArch64 SMP cores are often associated with a GICv3, providing Private +Peripheral Interrupts (PPI), Shared Peripheral Interrupts (SPI), +Software Generated Interrupts (SGI), and Locality-specific Peripheral +Interrupts (LPI). + +Main node required properties: + +- compatible : should at least contain "arm,gic-v3". +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. Must be a single cell with a value of at least 3. + + The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI + interrupts. Other values are reserved for future use. + + The 2nd cell contains the interrupt number for the interrupt type. + SPI interrupts are in the range [0-987]. PPI interrupts are in the + range [0-15]. + + The 3rd cell is the flags, encoded as follows: + bits[3:0] trigger type and level flags. + 1 = edge triggered + 4 = level triggered + + Cells 4 and beyond are reserved for future use. When the 1st cell + has a value of 0 or 1, cells 4 and beyond act as padding, and may be + ignored. It is recommended that padding cells have a value of 0. + +- reg : Specifies base physical address(s) and size of the GIC + registers, in the following order: + - GIC Distributor interface (GICD) + - GIC Redistributors (GICR), one range per redistributor region + - GIC CPU interface (GICC) + - GIC Hypervisor interface (GICH) + - GIC Virtual CPU interface (GICV) + + GICC, GICH and GICV are optional. + +- interrupts : Interrupt source of the VGIC maintenance interrupt. + +Optional + +- redistributor-stride : If using padding pages, specifies the stride + of consecutive redistributors. Must be a multiple of 64kB. + +- #redistributor-regions: The number of independent contiguous regions + occupied by the redistributors. Required if more than one such + region is present. + +Examples: + + gic: interrupt-controller@2cf00000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0x0 0x2f000000 0 0x10000>, // GICD + <0x0 0x2f100000 0 0x200000>, // GICR + <0x0 0x2c000000 0 0x2000>, // GICC + <0x0 0x2c010000 0 0x2000>, // GICH + <0x0 0x2c020000 0 0x2000>; // GICV + interrupts = <1 9 4>; + }; + + gic: interrupt-controller@2c010000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <3>; + interrupt-controller; + redistributor-stride = <0x0 0x40000>; // 256kB stride + #redistributor-regions = <2>; + reg = <0x0 0x2c010000 0 0x10000>, // GICD + <0x0 0x2d000000 0 0x800000>, // GICR 1: CPUs 0-31 + <0x0 0x2e000000 0 0x800000>; // GICR 2: CPUs 32-63 + <0x0 0x2c040000 0 0x2000>, // GICC + <0x0 0x2c060000 0 0x2000>, // GICH + <0x0 0x2c080000 0 0x2000>; // GICV + interrupts = <1 9 4>; + }; -- cgit v0.10.2 From 63f8344cb4917e5219d07cfd6fcd50860bcf5360 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 28 Nov 2013 18:24:58 +0000 Subject: arm64: boot protocol documentation update for GICv3 Linux has some requirements that must be satisfied in order to boot on a system built with a GICv3. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 37fc4f6..da1d4bf 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -141,6 +141,14 @@ Before jumping into the kernel, the following conditions must be met: the kernel image will be entered must be initialised by software at a higher exception level to prevent execution in an UNKNOWN state. + For systems with a GICv3 interrupt controller: + - If EL3 is present: + ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1. + ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1. + - If the kernel is entered at EL1: + ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1 + ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. -- cgit v0.10.2 From eede821dbfd58df89edb072da64e006321eaef58 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 10:20:36 +0100 Subject: KVM: arm/arm64: vgic: move GICv2 registers to their own structure In order to make way for the GICv3 registers, move the v2-specific registers to their own structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 85598b5..713e807 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -182,13 +182,13 @@ int main(void) DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); #ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); #ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 76af9302..e4eaf30 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -421,14 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] - str r3, [r11, #VGIC_CPU_HCR] - str r4, [r11, #VGIC_CPU_VMCR] - str r5, [r11, #VGIC_CPU_MISR] - str r6, [r11, #VGIC_CPU_EISR] - str r7, [r11, #(VGIC_CPU_EISR + 4)] - str r8, [r11, #VGIC_CPU_ELRSR] - str r9, [r11, #(VGIC_CPU_ELRSR + 4)] - str r10, [r11, #VGIC_CPU_APR] + str r3, [r11, #VGIC_V2_CPU_HCR] + str r4, [r11, #VGIC_V2_CPU_VMCR] + str r5, [r11, #VGIC_V2_CPU_MISR] + str r6, [r11, #VGIC_V2_CPU_EISR] + str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] + str r8, [r11, #VGIC_V2_CPU_ELRSR] + str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ mov r5, #0 @@ -436,7 +436,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Save list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 str r6, [r3], #4 @@ -463,9 +463,9 @@ vcpu .req r0 @ vcpu pointer always in r0 add r11, vcpu, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr r3, [r11, #VGIC_CPU_HCR] - ldr r4, [r11, #VGIC_CPU_VMCR] - ldr r8, [r11, #VGIC_CPU_APR] + ldr r3, [r11, #VGIC_V2_CPU_HCR] + ldr r4, [r11, #VGIC_V2_CPU_VMCR] + ldr r8, [r11, #VGIC_V2_CPU_APR] str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -473,7 +473,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Restore list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 str r6, [r2], #4 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 646f888..20fd488 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -129,13 +129,13 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index b0d1512..877d82a1 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -412,14 +412,14 @@ CPU_BE( rev w9, w9 ) CPU_BE( rev w10, w10 ) CPU_BE( rev w11, w11 ) - str w4, [x3, #VGIC_CPU_HCR] - str w5, [x3, #VGIC_CPU_VMCR] - str w6, [x3, #VGIC_CPU_MISR] - str w7, [x3, #VGIC_CPU_EISR] - str w8, [x3, #(VGIC_CPU_EISR + 4)] - str w9, [x3, #VGIC_CPU_ELRSR] - str w10, [x3, #(VGIC_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_CPU_APR] + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ str wzr, [x2, #GICH_HCR] @@ -427,7 +427,7 @@ CPU_BE( rev w11, w11 ) /* Save list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x2], #4 CPU_BE( rev w5, w5 ) str w5, [x3], #4 @@ -452,9 +452,9 @@ CPU_BE( rev w5, w5 ) add x3, x0, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_CPU_HCR] - ldr w5, [x3, #VGIC_CPU_VMCR] - ldr w6, [x3, #VGIC_CPU_APR] + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] CPU_BE( rev w4, w4 ) CPU_BE( rev w5, w5 ) CPU_BE( rev w6, w6 ) @@ -466,7 +466,7 @@ CPU_BE( rev w6, w6 ) /* Restore list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x3], #4 CPU_BE( rev w5, w5 ) str w5, [x2], #4 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f27000f..f738e5a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -110,6 +110,16 @@ struct vgic_dist { #endif }; +struct vgic_v2_cpu_if { + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_MAX_LRS]; +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -126,13 +136,9 @@ struct vgic_cpu { int nr_lr; /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + union { + struct vgic_v2_cpu_if vgic_v2; + }; #endif }; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9be..0ba1ab0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) { clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } @@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) u32 *lr; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_lr[i]; + lr = &vgic_cpu->vgic_v2.vgic_lr[i]; irq = LR_IRQID(*lr); source_cpu = LR_CPUID(*lr); @@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { vgic_retire_lr(lr, irq, vgic_cpu); @@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Do we have an active interrupt for the same CPUID? */ if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); + lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; return true; } @@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; return true; } @@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. */ int lr, irq; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; /* Any additional pending interrupt? */ if (vgic_dist_irq_is_pending(vcpu, irq)) { @@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; + set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; } @@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) level_pending = vgic_process_maintenance(vcpu); /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { int irq; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; BUG_ON(irq >= VGIC_NR_IRQS); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); @@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * points to their reset values. Anything else resets to zero * anyway. */ - vgic_cpu->vgic_vmcr = 0; + vgic_cpu->vgic_v2.vgic_vmcr = 0; vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ return 0; } @@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_vmcr & mask)) + if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) updated = true; - vgic_cpu->vgic_vmcr &= ~mask; - vgic_cpu->vgic_vmcr |= reg; + vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; + vgic_cpu->vgic_v2.vgic_vmcr |= reg; } return updated; } -- cgit v0.10.2 From 8d5c6b06a5d5f8ebcf40558e566781d572920740 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jun 2013 15:55:02 +0100 Subject: KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives In order to split the various register manipulation from the main vgic code, introduce a vgic_ops structure, and start by abstracting the LR manipulation code with a couple of accessors. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f738e5a..17bbe51 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -68,6 +68,24 @@ struct vgic_bytemap { u32 shared[VGIC_NR_SHARED_IRQS / 4]; }; +struct kvm_vcpu; + +#define LR_STATE_PENDING (1 << 0) +#define LR_STATE_ACTIVE (1 << 1) +#define LR_STATE_MASK (3 << 0) +#define LR_EOI_INT (1 << 2) + +struct vgic_lr { + u16 irq; + u8 source; + u8 state; +}; + +struct vgic_ops { + struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); + void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0ba1ab0..11408fe 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -94,9 +94,12 @@ static struct device_node *vgic_node; #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; @@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define LR_IRQID(lr) \ - ((lr) & GICH_LR_VIRTUALID) - -static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) -{ - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; -} - /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int vcpu_id = vcpu->vcpu_id; - int i, irq, source_cpu; - u32 *lr; + int i; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_v2.vgic_lr[i]; - irq = LR_IRQID(*lr); - source_cpu = LR_CPUID(*lr); + struct vgic_lr lr = vgic_get_lr(vcpu, i); /* * There are three options for the state bits: @@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * If the LR holds only an active interrupt (not pending) then * just leave it alone. */ - if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) continue; /* @@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, irq); - if (irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; - *lr &= ~GICH_LR_PENDING_BIT; + vgic_dist_irq_set(vcpu, lr.irq); + if (lr.irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + lr.state &= ~LR_STATE_PENDING; + vgic_set_lr(vcpu, i, lr); /* * If there's no state left on the LR (it could still be * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(*lr & GICH_LR_STATE)) - vgic_retire_lr(i, irq, vgic_cpu); + if (!(lr.state & LR_STATE_MASK)) + vgic_retire_lr(i, lr.irq, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm) } } -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static const struct vgic_ops vgic_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, +}; + +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + return vgic_ops.get_lr(vcpu, lr); +} + +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.set_lr(vcpu, lr, vlr); +} + +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + + vlr.state = 0; + vgic_set_lr(vcpu, lr_nr, vlr); + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} /* * An interrupt may have been disabled after being made pending on the @@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_retire_lr(lr, irq, vgic_cpu); - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { + vgic_retire_lr(lr, vlr.irq, vcpu); + if (vgic_irq_is_active(vcpu, vlr.irq)) + vgic_irq_clear_active(vcpu, vlr.irq); } } } @@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr; int lr; /* Sanitize the input... */ @@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr = vgic_cpu->vgic_irq_lr_map[irq]; /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; + if (lr != LR_EMPTY) { + vlr = vgic_get_lr(vcpu, lr); + if (vlr.source == sgi_source_id) { + kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vlr.state |= LR_STATE_PENDING; + vgic_set_lr(vcpu, lr, vlr); + return true; + } } /* Try to use another LR for this interrupt */ @@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); + vlr.irq = irq; + vlr.source = sgi_source_id; + vlr.state = LR_STATE_PENDING; if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr, vlr); return true; } @@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ - int lr, irq; + int lr; for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_irq_clear_active(vcpu, vlr.irq); + WARN_ON(vlr.state & LR_STATE_MASK); + vlr.state = 0; + vgic_set_lr(vcpu, lr, vlr); /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); + if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { - vgic_cpu_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); } /* @@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * been marked as empty. */ set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Clear mappings for empty LRs */ for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { - int irq; + struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + vlr = vgic_get_lr(vcpu, lr); - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + BUG_ON(vlr.irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ -- cgit v0.10.2 From 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:29:39 +0100 Subject: KVM: ARM: vgic: abstract access to the ELRSR bitmap Move the GICH_ELRSR access to its own functions, and add them to the vgic_ops structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 17bbe51..38864f5e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -84,6 +84,8 @@ struct vgic_lr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); + void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); + u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 11408fe..6dcc974 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; } +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops.set_lr(vcpu, lr, vlr); } +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); +} + +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_elrsr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); } } @@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr; + unsigned long *elrsr_ptr; int lr, pending; bool level_pending; level_pending = vgic_process_maintenance(vcpu); + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr); + pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } -- cgit v0.10.2 From 8d6a0313c125c3c7b208b75695fe6ab00afab4c5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:33:43 +0100 Subject: KVM: ARM: vgic: abstract EISR bitmap access Move the GICH_EISR access to its own function. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 38864f5e..ccb9b59 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -86,6 +86,7 @@ struct vgic_ops { void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); + u64 (*get_eisr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6dcc974..1e857e6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) return val; } +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) return vgic_ops.get_elrsr(vcpu); } +static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_eisr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ + u64 eisr = vgic_get_eisr(vcpu); + unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); -- cgit v0.10.2 From 495dd859f304689a7cd5ef413c439cb090dc25e6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:02:10 +0100 Subject: KVM: ARM: vgic: abstract MISR decoding Instead of directly dealing with the GICH_MISR bits, move the code to its own function and use a couple of public flags to represent the actual state. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ccb9b59..4857508 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -87,6 +87,7 @@ struct vgic_ops { void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); + u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { @@ -165,6 +166,9 @@ struct vgic_cpu { #define LR_EMPTY 0xff +#define INT_STATUS_EOI (1 << 0) +#define INT_STATUS_UNDERFLOW (1 << 1) + struct kvm; struct kvm_vcpu; struct kvm_run; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1e857e6..c0bcc97 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return val; } +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops.get_eisr(vcpu); } +static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_interrupt_status(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1282,11 +1301,12 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); + kvm_debug("STATUS = %08x\n", status); - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { + if (status & INT_STATUS_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. @@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } } - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + if (status & INT_STATUS_UNDERFLOW) vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; -- cgit v0.10.2 From 909d9b5025f149af6cfc304a76ad6218e6622cc0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:24:17 +0100 Subject: KVM: ARM: vgic: move underflow handling to vgic_ops Move the code dealing with LR underflow handling to its own functions, and make them accessible through vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4857508..cdfa5d9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -88,6 +88,8 @@ struct vgic_ops { u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); + void (*enable_underflow)(struct kvm_vcpu *vcpu); + void (*disable_underflow)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c0bcc97..6d618e0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) return ret; } +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = { .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) return vgic_ops.get_interrupt_status(vcpu); } +static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable_underflow(vcpu); +} + +static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.disable_underflow(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; + vgic_enable_underflow(vcpu); } else { - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } if (status & INT_STATUS_UNDERFLOW) - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); return level_pending; } -- cgit v0.10.2 From beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 17:48:10 +0000 Subject: KVM: ARM: vgic: abstract VMCR access Instead of directly messing with with the GICH_VMCR bits for the CPU interface save/restore code, add accessors that encode/decode the entire set of registers exposed by VMCR. Not the most efficient thing, but given that this code is only used by the save/restore code, performance is far from being critical. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdfa5d9..f515800 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -81,6 +81,13 @@ struct vgic_lr { u8 state; }; +struct vgic_vmcr { + u32 ctlr; + u32 abpr; + u32 bpr; + u32 pmr; +}; + struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); @@ -90,6 +97,8 @@ struct vgic_ops { u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); void (*enable_underflow)(struct kvm_vcpu *vcpu); void (*disable_underflow)(struct kvm_vcpu *vcpu); + void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6d618e0..5c70639 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u32 vgic_nr_lr; +static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, @@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; } +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = { .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) vgic_ops.disable_underflow(vcpu); } +static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.get_vmcr(vcpu, vmcr); +} + +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.set_vmcr(vcpu, vmcr); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u32 reg, mask = 0, shift = 0; bool updated = false; + struct vgic_vmcr vmcr; + u32 *vmcr_field; + u32 reg; + + vgic_get_vmcr(vcpu, &vmcr); switch (offset & ~0x3) { case GIC_CPU_CTRL: - mask = GICH_VMCR_CTRL_MASK; - shift = GICH_VMCR_CTRL_SHIFT; + vmcr_field = &vmcr.ctlr; break; case GIC_CPU_PRIMASK: - mask = GICH_VMCR_PRIMASK_MASK; - shift = GICH_VMCR_PRIMASK_SHIFT; + vmcr_field = &vmcr.pmr; break; case GIC_CPU_BINPOINT: - mask = GICH_VMCR_BINPOINT_MASK; - shift = GICH_VMCR_BINPOINT_SHIFT; + vmcr_field = &vmcr.bpr; break; case GIC_CPU_ALIAS_BINPOINT: - mask = GICH_VMCR_ALIAS_BINPOINT_MASK; - shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcr_field = &vmcr.abpr; break; + default: + BUG(); } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; + reg = *vmcr_field; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); - reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) + if (reg != *vmcr_field) { + *vmcr_field = reg; + vgic_set_vmcr(vcpu, &vmcr); updated = true; - vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; - vgic_cpu->vgic_v2.vgic_vmcr |= reg; + } } return updated; } -- cgit v0.10.2 From da8dafd1777cdd93091207952297d221a88e6479 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:36:38 +0100 Subject: KVM: ARM: vgic: introduce vgic_enable Move the code dealing with enabling the VGIC on to vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f515800..2228973 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -99,6 +99,7 @@ struct vgic_ops { void (*disable_underflow)(struct kvm_vcpu *vcpu); void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*enable)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5c70639..70f674b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = { .disable_underflow = vgic_v2_disable_underflow, .get_vmcr = vgic_v2_get_vmcr, .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) vgic_ops.set_vmcr(vcpu, vmcr); } +static inline void vgic_enable(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_v2.vgic_vmcr = 0; - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + vgic_enable(vcpu); return 0; } -- cgit v0.10.2 From ca85f623e37d096206e092ef037a145a60fa7f85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2013 19:17:28 +0100 Subject: KVM: ARM: introduce vgic_params structure Move all the data specific to a given GIC implementation into its own little structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2228973..ce2e142 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -102,6 +102,17 @@ struct vgic_ops { void (*enable)(struct kvm_vcpu *vcpu); }; +struct vgic_params { + /* Physical address of vgic virtual cpu interface */ + phys_addr_t vcpu_base; + /* Number of list registers */ + u32 nr_lr; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface base address */ + void __iomem *vctrl_base; +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 70f674b..f3a996d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -76,14 +76,6 @@ #define IMPLEMENTER_ARM 0x43b #define GICC_ARCH_VERSION_V2 0x2 -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - #define ACCESS_READ_VALUE (1 << 0) #define ACCESS_READ_RAZ (0 << 0) #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) @@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static u32 vgic_nr_lr; -static unsigned int vgic_maint_irq; +static struct vgic_params vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) + vgic.nr_lr); + if (lr >= vgic.nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1377,7 +1368,6 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; @@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); + if (level_pending || pending < vgic.nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - vgic_cpu->nr_lr = vgic_nr_lr; + /* + * Store the number of LRs per vcpu, so we don't have to go + * all the way to the distributor structure to find out. Only + * assembly code should use this one. + */ + vgic_cpu->nr_lr = vgic.nr_lr; vgic_enable(vcpu); @@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic_maint_irq, 0); + enable_percpu_irq(vgic.maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); + disable_percpu_irq(vgic.maint_irq); break; } @@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void) int ret; struct resource vctrl_res; struct resource vcpu_res; + struct device_node *vgic_node; vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); if (!vgic_node) { @@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void) return -ENODEV; } - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { + vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic.maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); goto out; } @@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { + vgic.vctrl_base = of_iomap(vgic_node, 2); + if (!vgic.vctrl_base) { kvm_err("Cannot ioremap VCTRL\n"); ret = -ENOMEM; goto out_free_irq; } - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); + vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), + ret = create_hyp_io_mappings(vgic.vctrl_base, + vgic.vctrl_base + resource_size(&vctrl_res), vctrl_res.start); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); @@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void) } kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); + vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { @@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void) ret = -ENXIO; goto out_unmap; } - vgic_vcpu_base = vcpu_res.start; + vgic.vcpu_base = vcpu_res.start; goto out; out_unmap: - iounmap(vgic_vctrl_base); + iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); + free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); out: of_node_put(vgic_node); return ret; @@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vctrl_base = vgic.vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v0.10.2 From 8f186d522c69bb18dd9b93a634da4953228c67d4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 18:13:03 +0000 Subject: KVM: ARM: vgic: split GICv2 backend from the main vgic code Brutally hack the innocent vgic code, and move the GICv2 specific code to its own file, using vgic_ops and vgic_params as a way to pass information between the two blocks. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 789bca9..f7057ed 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 72a9fd5..7e92952 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ce2e142..d8d52a9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,7 +32,8 @@ #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) #define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) + +#define VGIC_V2_MAX_LRS (1 << 6) /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -162,7 +163,7 @@ struct vgic_v2_cpu_if { u32 vgic_eisr[2]; /* Saved only */ u32 vgic_elrsr[2]; /* Saved only */ u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + u32 vgic_lr[VGIC_V2_MAX_LRS]; }; struct vgic_cpu { @@ -175,7 +176,7 @@ struct vgic_cpu { DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); + DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); /* Number of list registers on this CPU */ int nr_lr; @@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) #define vgic_initialized(k) ((k)->arch.vgic.ready) +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); + #else static inline int kvm_vgic_hyp_init(void) { diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c new file mode 100644 index 0000000..940418e --- /dev/null +++ b/virt/kvm/arm/vgic-v2.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +static const struct vgic_ops vgic_v2_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, +}; + +static struct vgic_params vgic_v2_params; + +/** + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv2 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv2 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v2_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain GICH resource\n"); + goto out; + } + + vgic->vctrl_base = of_iomap(vgic_node, 2); + if (!vgic->vctrl_base) { + kvm_err("Cannot ioremap GICH\n"); + ret = -ENOMEM; + goto out; + } + + vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); + vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic->vctrl_base, + vgic->vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain GICV resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic->maint_irq); + + *ops = &vgic_v2_ops; + *params = vgic; + goto out; + +out_unmap: + iounmap(vgic->vctrl_base); +out: + of_node_put(vgic_node); + return ret; +} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f3a996d..e4b9cbb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static struct vgic_params vgic; +static const struct vgic_ops *vgic_ops; +static const struct vgic_params *vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm) } } -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} - -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - if (!(lr_desc.state & LR_STATE_MASK)) - set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -#endif - return val; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -#endif - return val; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) { - return vgic_ops.get_lr(vcpu, lr); + return vgic_ops->get_lr(vcpu, lr); } static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.set_lr(vcpu, lr, vlr); + vgic_ops->set_lr(vcpu, lr, vlr); } static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); + vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); } static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_elrsr(vcpu); + return vgic_ops->get_elrsr(vcpu); } static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_eisr(vcpu); + return vgic_ops->get_eisr(vcpu); } static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { - return vgic_ops.get_interrupt_status(vcpu); + return vgic_ops->get_interrupt_status(vcpu); } static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.enable_underflow(vcpu); + vgic_ops->enable_underflow(vcpu); } static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.disable_underflow(vcpu); + vgic_ops->disable_underflow(vcpu); } static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.get_vmcr(vcpu, vmcr); + vgic_ops->get_vmcr(vcpu, vmcr); } static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.set_vmcr(vcpu, vmcr); + vgic_ops->set_vmcr(vcpu, vmcr); } static inline void vgic_enable(struct kvm_vcpu *vcpu) { - vgic_ops.enable(vcpu); + vgic_ops->enable(vcpu); } static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) @@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic.nr_lr); - if (lr >= vgic.nr_lr) + vgic->nr_lr); + if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); - if (level_pending || pending < vgic.nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); + if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * all the way to the distributor structure to find out. Only * assembly code should use this one. */ - vgic_cpu->nr_lr = vgic.nr_lr; + vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); @@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic.maint_irq, 0); + enable_percpu_irq(vgic->maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic.maint_irq); + disable_percpu_irq(vgic->maint_irq); break; } @@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = { .notifier_call = vgic_cpu_notify, }; +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + {}, +}; + int kvm_vgic_hyp_init(void) { - int ret; - struct resource vctrl_res; - struct resource vcpu_res; + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; + int ret; - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); + kvm_err("error: no compatible GIC node found\n"); return -ENODEV; } - vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic.maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; - ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); - goto out; + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; } ret = __register_cpu_notifier(&vgic_cpu_nb); @@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic.vctrl_base = of_iomap(vgic_node, 2); - if (!vgic.vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); - vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic.vctrl_base, - vgic.vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic.vcpu_base = vcpu_res.start; - - goto out; + return 0; -out_unmap: - iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } @@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic.vctrl_base; + kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v0.10.2 From f982cf4e9c37b19478c7bc6e0484a43a7e78cf57 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 May 2014 10:03:25 +0100 Subject: KVM: ARM: vgic: revisit implementation of irqchip_in_kernel So far, irqchip_in_kernel() was implemented by testing the value of vctrl_base, which worked fine with GICv2. With GICv3, this field is useless, as we're using system registers instead of a emmory mapped interface. To solve this, add a boolean flag indicating if the we're using a vgic or not. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d8d52a9..f6b9fec 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -117,6 +117,7 @@ struct vgic_params { struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; + bool in_kernel; bool ready; /* Virtual control interface mapping */ @@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e4b9cbb..1348e74 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v0.10.2 From 45451914c875bba44903ce4f1445e047b7992bf7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Jun 2013 15:16:40 +0100 Subject: arm64: KVM: remove __kvm_hyp_code_{start,end} from hyp.S We already have __hyp_text_{start,end} to express the boundaries of the HYP text section, and __kvm_hyp_code_{start,end} are getting in the way of a more modular world switch code. Just turn __kvm_hyp_code_{start,end} into #defines mapping the linker-emited symbols. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9fcd54b..d0bfc4b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -18,6 +18,8 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include + /* * 0 is reserved as an invalid value. * Order *must* be kept in sync with the hyp switch code. @@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[]; extern char __kvm_hyp_vector[]; -extern char __kvm_hyp_code_start[]; -extern char __kvm_hyp_code_end[]; +#define __kvm_hyp_code_start __hyp_text_start +#define __kvm_hyp_code_end __hyp_text_end extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 215ad46..7a5df52 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 877d82a1..9c5d0ac 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -36,9 +36,6 @@ .pushsection .hyp.text, "ax" .align PAGE_SHIFT -__kvm_hyp_code_start: - .globl __kvm_hyp_code_start - .macro save_common_regs // x2: base address for cpu context // x3: tmp register @@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) -__kvm_hyp_code_end: - .globl __kvm_hyp_code_end - .popsection -- cgit v0.10.2 From 1a9b13056dde7e3092304d6041ccc60a913042ea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 11:57:56 +0100 Subject: arm64: KVM: split GICv2 world switch from hyp code Move the GICv2 world switch code into its own file, and add the necessary indirection to the arm64 switch code. Also introduce a new type field to the vgic_params structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dc4e3ed..6dfb404 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) return 0; } +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + BUG_ON(vgic->type != VGIC_V2); +} + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d0bfc4b..6252264 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +extern char __save_vgic_v2_state[]; +extern char __restore_vgic_v2_state[]; + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 92242ce..4c182d0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } +struct vgic_sr_vectors { + void *save_vgic; + void *restore_vgic; +}; + +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + extern struct vgic_sr_vectors __vgic_sr_vectors; + + switch(vgic->type) + { + case VGIC_V2: + __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; + break; + + default: + BUG(); + } +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 20fd488..dafc415 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -129,6 +129,9 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); + DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); + DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7e92952..daf24dc 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 9c5d0ac..56df9a3 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -16,7 +16,6 @@ */ #include -#include #include #include @@ -376,100 +375,23 @@ .endm /* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! + * Call into the vgic backend for state saving */ .macro save_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w4, [x2, #GICH_HCR] - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w4, [x3, #VGIC_V2_CPU_HCR] - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] - str w7, [x3, #VGIC_V2_CPU_EISR] - str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] - str w9, [x3, #VGIC_V2_CPU_ELRSR] - str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, VGIC_SAVE_FN] + kern_hyp_va x24 + blr x24 .endm /* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct + * Call into the vgic backend for state restoring */ .macro restore_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, #VGIC_RESTORE_FN] + kern_hyp_va x24 + blr x24 .endm .macro save_timer_state @@ -650,6 +572,12 @@ ENTRY(__kvm_flush_vm_context) ret ENDPROC(__kvm_flush_vm_context) + // struct vgic_sr_vectors __vgi_sr_vectors; + .align 3 +ENTRY(__vgic_sr_vectors) + .skip VGIC_SR_VECTOR_SZ +ENDPROC(__vgic_sr_vectors) + __kvm_hyp_panic: // Guess the context by looking at VTTBR: // If zero, then we're already a host. diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S new file mode 100644 index 0000000..ae21177 --- /dev/null +++ b/arch/arm64/kvm/vgic-v2-switch.S @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + .text + .pushsection .hyp.text, "ax" + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +ENTRY(__save_vgic_v2_state) +__save_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) + + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__save_vgic_v2_state) + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +ENTRY(__restore_vgic_v2_state) +__restore_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__restore_vgic_v2_state) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f6b9fec..65f1121 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,7 +24,6 @@ #include #include #include -#include #define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 @@ -71,6 +70,10 @@ struct vgic_bytemap { struct kvm_vcpu; +enum vgic_type { + VGIC_V2, /* Good ol' GICv2 */ +}; + #define LR_STATE_PENDING (1 << 0) #define LR_STATE_ACTIVE (1 << 1) #define LR_STATE_MASK (3 << 0) @@ -104,6 +107,8 @@ struct vgic_ops { }; struct vgic_params { + /* vgic type */ + enum vgic_type type; /* Physical address of vgic virtual cpu interface */ phys_addr_t vcpu_base; /* Number of list registers */ diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 940418e..d6c9c14 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node, kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, vgic->maint_irq); + vgic->type = VGIC_V2; *ops = &vgic_v2_ops; *params = vgic; goto out; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1348e74..7867b9a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void) on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + return 0; out_free_irq: -- cgit v0.10.2 From ac3c3747e2db2f326ffc601651de544cdd33a8e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2013 18:19:11 +0100 Subject: arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into the vgic switch code GICv3 requires the IMO and FMO bits to be tightly coupled with some of the interrupt controller's register switch. In order to have similar code paths, move the manipulation of these bits to the GICv2 switch code. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..cc83520 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -76,9 +76,10 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) +#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) + /* Hyp System Control Register (SCTLR_EL2) bits */ #define SCTLR_EL2_EE (1 << 25) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 56df9a3..5945f3b 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -335,11 +335,8 @@ .endm .macro activate_traps - ldr x2, [x0, #VCPU_IRQ_LINES] - ldr x1, [x0, #VCPU_HCR_EL2] - orr x2, x2, x1 - msr hcr_el2, x2 - + ldr x2, [x0, #VCPU_HCR_EL2] + msr hcr_el2, x2 ldr x2, =(CPTR_EL2_TTA) msr cptr_el2, x2 @@ -382,12 +379,22 @@ ldr x24, [x24, VGIC_SAVE_FN] kern_hyp_va x24 blr x24 + mrs x24, hcr_el2 + mov x25, #HCR_INT_OVERRIDE + neg x25, x25 + and x24, x24, x25 + msr hcr_el2, x24 .endm /* * Call into the vgic backend for state restoring */ .macro restore_vgic_state + mrs x24, hcr_el2 + ldr x25, [x0, #VCPU_IRQ_LINES] + orr x24, x24, #HCR_INT_OVERRIDE + orr x24, x24, x25 + msr hcr_el2, x24 adr x24, __vgic_sr_vectors ldr x24, [x24, #VGIC_RESTORE_FN] kern_hyp_va x24 -- cgit v0.10.2 From b2fb1c0d378399e1427a91bb991c094f2ca09a2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Jul 2013 15:15:23 +0100 Subject: KVM: ARM: vgic: add the GICv3 backend Introduce the support code for emulating a GICv2 on top of GICv3 hardware. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6252264..ed4987b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern u64 __vgic_v3_get_ich_vtr_el2(void); + extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S new file mode 100644 index 0000000..9fbf273 --- /dev/null +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__vgic_v3_get_ich_vtr_el2) + mrs x0, ICH_VTR_EL2 + ret +ENDPROC(__vgic_v3_get_ich_vtr_el2) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 65f1121..35b0c12 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -33,6 +33,7 @@ #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) +#define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -72,6 +73,7 @@ struct kvm_vcpu; enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ + VGIC_V3, /* New fancy GICv3 */ }; #define LR_STATE_PENDING (1 << 0) @@ -172,6 +174,19 @@ struct vgic_v2_cpu_if { u32 vgic_lr[VGIC_V2_MAX_LRS]; }; +struct vgic_v3_cpu_if { +#ifdef CONFIG_ARM_GIC_V3 + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr; /* Saved only */ + u32 vgic_elrsr; /* Saved only */ + u32 vgic_ap0r[4]; + u32 vgic_ap1r[4]; + u64 vgic_lr[VGIC_V3_MAX_LRS]; +#endif +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -190,6 +205,7 @@ struct vgic_cpu { /* CPU vif control registers for world switch */ union { struct vgic_v2_cpu_if vgic_v2; + struct vgic_v3_cpu_if vgic_v3; }; #endif }; @@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, const struct vgic_params **params); +#ifdef CONFIG_ARM_GIC_V3 +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); +#else +static inline int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + return -ENODEV; +} +#endif #else static inline int kvm_vgic_hyp_init(void) diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c new file mode 100644 index 0000000..f01d446 --- /dev/null +++ b/virt/kvm/arm/vgic-v3.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +/* + * LRs are stored in reverse order in memory. make sure we index them + * correctly. + */ +#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + +static u32 ich_vtr_el2; + +static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & ICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & ICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & ICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | + lr_desc.irq); + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= ICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= ICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= ICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; +} + +static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); +} + +static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; +} + +static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; +} + +static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; + u32 ret = 0; + + if (misr & ICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & ICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + + vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; +} + +static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; +} + +static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; +} + +static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; +} + +static void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; +} + +static const struct vgic_ops vgic_v3_ops = { + .get_lr = vgic_v3_get_lr, + .set_lr = vgic_v3_set_lr, + .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, + .get_elrsr = vgic_v3_get_elrsr, + .get_eisr = vgic_v3_get_eisr, + .get_interrupt_status = vgic_v3_get_interrupt_status, + .enable_underflow = vgic_v3_enable_underflow, + .disable_underflow = vgic_v3_disable_underflow, + .get_vmcr = vgic_v3_get_vmcr, + .set_vmcr = vgic_v3_set_vmcr, + .enable = vgic_v3_enable, +}; + +static struct vgic_params vgic_v3_params; + +/** + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv3 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv3 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret = 0; + u32 gicv_idx; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v3_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + + /* + * The ListRegs field is 5 bits, but there is a architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; + + if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) + gicv_idx = 1; + + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ + if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { + kvm_err("Cannot obtain GICV region\n"); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; + vgic->vctrl_base = NULL; + vgic->type = VGIC_V3; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vcpu_res.start, vgic->maint_irq); + + *ops = &vgic_v3_ops; + *params = vgic; + +out: + of_node_put(vgic_node); + return ret; +} -- cgit v0.10.2 From 754d37726010d872f1f714a8ce8920acdfa4978c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: arm64: KVM: vgic: add GICv3 world switch Introduce the GICv3 world switch code used to save/restore the GICv3 context. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ed4987b..a28c35b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void); extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; +extern char __save_vgic_v3_state[]; +extern char __restore_vgic_v3_state[]; #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index dafc415..e74654c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -139,6 +139,14 @@ int main(void) DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); + DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); + DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); + DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); + DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); + DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); + DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); + DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 9fbf273..21e68f6 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -18,9 +18,247 @@ #include #include +#include +#include +#include +#include +#include +#include + .text .pushsection .hyp.text, "ax" +/* + * We store LRs in reverse order to let the CPU deal with streaming + * access. Use this macro to make it look saner... + */ +#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_v3_state + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Make sure stores to the GIC via the memory mapped interface + // are now visible to the system register interface + dsb st + + // Save all interesting registers + mrs x4, ICH_HCR_EL2 + mrs x5, ICH_VMCR_EL2 + mrs x6, ICH_MISR_EL2 + mrs x7, ICH_EISR_EL2 + mrs x8, ICH_ELSR_EL2 + + str w4, [x3, #VGIC_V3_CPU_HCR] + str w5, [x3, #VGIC_V3_CPU_VMCR] + str w6, [x3, #VGIC_V3_CPU_MISR] + str w7, [x3, #VGIC_V3_CPU_EISR] + str w8, [x3, #VGIC_V3_CPU_ELRSR] + + msr ICH_HCR_EL2, xzr + + mrs x21, ICH_VTR_EL2 + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + mrs x20, ICH_LR15_EL2 + mrs x19, ICH_LR14_EL2 + mrs x18, ICH_LR13_EL2 + mrs x17, ICH_LR12_EL2 + mrs x16, ICH_LR11_EL2 + mrs x15, ICH_LR10_EL2 + mrs x14, ICH_LR9_EL2 + mrs x13, ICH_LR8_EL2 + mrs x12, ICH_LR7_EL2 + mrs x11, ICH_LR6_EL2 + mrs x10, ICH_LR5_EL2 + mrs x9, ICH_LR4_EL2 + mrs x8, ICH_LR3_EL2 + mrs x7, ICH_LR2_EL2 + mrs x6, ICH_LR1_EL2 + mrs x5, ICH_LR0_EL2 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + str x20, [x3, #LR_OFFSET(15)] + str x19, [x3, #LR_OFFSET(14)] + str x18, [x3, #LR_OFFSET(13)] + str x17, [x3, #LR_OFFSET(12)] + str x16, [x3, #LR_OFFSET(11)] + str x15, [x3, #LR_OFFSET(10)] + str x14, [x3, #LR_OFFSET(9)] + str x13, [x3, #LR_OFFSET(8)] + str x12, [x3, #LR_OFFSET(7)] + str x11, [x3, #LR_OFFSET(6)] + str x10, [x3, #LR_OFFSET(5)] + str x9, [x3, #LR_OFFSET(4)] + str x8, [x3, #LR_OFFSET(3)] + str x7, [x3, #LR_OFFSET(2)] + str x6, [x3, #LR_OFFSET(1)] + str x5, [x3, #LR_OFFSET(0)] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP0R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + mrs x19, ICH_AP0R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] +6: mrs x18, ICH_AP0R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] +5: mrs x17, ICH_AP0R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP0R] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP1R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + mrs x19, ICH_AP1R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] +6: mrs x18, ICH_AP1R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] +5: mrs x17, ICH_AP1R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP1R] + + // Restore SRE_EL1 access and re-enable SRE at EL1. + mrs x5, ICC_SRE_EL2 + orr x5, x5, #ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 + isb + mov x5, #1 + msr ICC_SRE_EL1, x5 +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_v3_state + // Disable SRE_EL1 access. Necessary, otherwise + // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... + msr ICC_SRE_EL1, xzr + isb + + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Restore all interesting registers + ldr w4, [x3, #VGIC_V3_CPU_HCR] + ldr w5, [x3, #VGIC_V3_CPU_VMCR] + + msr ICH_HCR_EL2, x4 + msr ICH_VMCR_EL2, x5 + + mrs x21, ICH_VTR_EL2 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + msr ICH_AP1R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] + msr ICH_AP1R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] + msr ICH_AP1R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] + msr ICH_AP1R0_EL2, x17 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + msr ICH_AP0R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] + msr ICH_AP0R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] + msr ICH_AP0R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] + msr ICH_AP0R0_EL2, x17 + + and w22, w21, #0xf + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + ldr x20, [x3, #LR_OFFSET(15)] + ldr x19, [x3, #LR_OFFSET(14)] + ldr x18, [x3, #LR_OFFSET(13)] + ldr x17, [x3, #LR_OFFSET(12)] + ldr x16, [x3, #LR_OFFSET(11)] + ldr x15, [x3, #LR_OFFSET(10)] + ldr x14, [x3, #LR_OFFSET(9)] + ldr x13, [x3, #LR_OFFSET(8)] + ldr x12, [x3, #LR_OFFSET(7)] + ldr x11, [x3, #LR_OFFSET(6)] + ldr x10, [x3, #LR_OFFSET(5)] + ldr x9, [x3, #LR_OFFSET(4)] + ldr x8, [x3, #LR_OFFSET(3)] + ldr x7, [x3, #LR_OFFSET(2)] + ldr x6, [x3, #LR_OFFSET(1)] + ldr x5, [x3, #LR_OFFSET(0)] + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + msr ICH_LR15_EL2, x20 + msr ICH_LR14_EL2, x19 + msr ICH_LR13_EL2, x18 + msr ICH_LR12_EL2, x17 + msr ICH_LR11_EL2, x16 + msr ICH_LR10_EL2, x15 + msr ICH_LR9_EL2, x14 + msr ICH_LR8_EL2, x13 + msr ICH_LR7_EL2, x12 + msr ICH_LR6_EL2, x11 + msr ICH_LR5_EL2, x10 + msr ICH_LR4_EL2, x9 + msr ICH_LR3_EL2, x8 + msr ICH_LR2_EL2, x7 + msr ICH_LR1_EL2, x6 + msr ICH_LR0_EL2, x5 + + // Ensure that the above will have reached the + // (re)distributors. This ensure the guest will read + // the correct values from the memory-mapped interface. + isb + dsb sy + + // Prevent the guest from touching the GIC system registers + mrs x5, ICC_SRE_EL2 + and x5, x5, #~ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 +.endm + +ENTRY(__save_vgic_v3_state) + save_vgic_v3_state + ret +ENDPROC(__save_vgic_v3_state) + +ENTRY(__restore_vgic_v3_state) + restore_vgic_v3_state + ret +ENDPROC(__restore_vgic_v3_state) + ENTRY(__vgic_v3_get_ich_vtr_el2) mrs x0, ICH_VTR_EL2 ret -- cgit v0.10.2 From 67b2abfedb7b861bead93400fa315c5c30879d51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: arm64: KVM: vgic: enable GICv2 emulation on top on GICv3 hardware Add the last missing bits that enable GICv2 emulation on top of GICv3 hardware. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4c182d0..4ae9213 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; break; +#ifdef CONFIG_ARM_GIC_V3 + case VGIC_V3: + __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; + break; +#endif + default: BUG(); } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index daf24dc..32a0961 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7867b9a..795ab48 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = { static const struct of_device_id vgic_ids[] = { { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; -- cgit v0.10.2 From af92394efc8be73edd2301fc15f9b57fd430cd18 Mon Sep 17 00:00:00 2001 From: Li Liu Date: Tue, 1 Jul 2014 18:01:50 +0800 Subject: ARM: virt: fix wrong HSCTLR.EE bit setting HSCTLR.EE is defined as bit[25] referring to arm manual DDI0606C.b(p1590). Reviewed-by: Marc Zyngier Signed-off-by: Li Liu Signed-off-by: Marc Zyngier diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 797b1a6..7e666cf 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -134,9 +134,7 @@ ENTRY(__hyp_stub_install_secondary) mcr p15, 4, r7, c1, c1, 3 @ HSTR THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE -#ifdef CONFIG_CPU_BIG_ENDIAN - orr r7, #(1 << 9) @ HSCTLR.EE -#endif +ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE mcr p15, 4, r7, c1, c0, 0 @ HSCTLR mrc p15, 4, r7, c1, c1, 1 @ HDCR -- cgit v0.10.2 From 64054c25cf7e060cd6780744fefe7ed3990e4f21 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:01 -0700 Subject: ARM: KVM: fix vgic V7 assembler code to work in BE image The vgic h/w registers are little endian; when BE asm code reads/writes from/to them, it needs to do byteswap after/before. Byteswap code uses ARM_BE8 wrapper to add swap only if CONFIG_CPU_BIG_ENDIAN is configured. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index e4eaf30..68d99c6 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,4 +1,5 @@ #include +#include #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -420,6 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r8, [r2, #GICH_ELRSR0] ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r5, r5 ) +ARM_BE8(rev r6, r6 ) +ARM_BE8(rev r7, r7 ) +ARM_BE8(rev r8, r8 ) +ARM_BE8(rev r9, r9 ) +ARM_BE8(rev r10, r10 ) str r3, [r11, #VGIC_V2_CPU_HCR] str r4, [r11, #VGIC_V2_CPU_VMCR] @@ -439,6 +448,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 +ARM_BE8(rev r6, r6 ) str r6, [r3], #4 subs r4, r4, #1 bne 1b @@ -466,6 +476,9 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r3, [r11, #VGIC_V2_CPU_HCR] ldr r4, [r11, #VGIC_V2_CPU_VMCR] ldr r8, [r11, #VGIC_V2_CPU_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r8, r8 ) str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -476,6 +489,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 +ARM_BE8(rev r6, r6 ) str r6, [r2], #4 subs r4, r4, #1 bne 1b -- cgit v0.10.2 From 19b0e60a63f758a28329aa40f4270a6c98c2dcb7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:02 -0700 Subject: ARM: KVM: handle 64bit values passed to mrcc or from mcrr instructions in BE case In some cases the mcrr and mrrc instructions in combination with the ldrd and strd instructions need to deal with 64bit value in memory. The ldrd and strd instructions already handle endianness within word (register) boundaries but to get effect of the whole 64bit value represented correctly, rr_lo_hi macro is introduced and is used to swap registers positions when the mcrr and mrrc instructions are used. That has the effect of swapping two words. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 53b3c4a..3a67bec 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -61,6 +61,24 @@ #define ARM_EXCEPTION_FIQ 6 #define ARM_EXCEPTION_HVC 7 +/* + * The rr_lo_hi macro swaps a pair of registers depending on + * current endianness. It is used in conjunction with ldrd and strd + * instructions that load/store a 64-bit value from/to memory to/from + * a pair of registers which are used with the mrrc and mcrr instructions. + * If used with the ldrd/strd instructions, the a1 parameter is the first + * source/destination register and the a2 parameter is the second + * source/destination register. Note that the ldrd/strd instructions + * already swap the bytes within the words correctly according to the + * endianness setting, but the order of the registers need to be effectively + * swapped when used with the mrrc/mcrr instructions. + */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define rr_lo_hi(a1, a2) a2, a1 +#else +#define rr_lo_hi(a1, a2) a1, a2 +#endif + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d..2cc14df 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -71,7 +71,7 @@ __do_hyp_init: bne phase2 @ Yes, second stage init @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. @@ -137,7 +137,7 @@ phase2: mov pc, r0 target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 isb @ Invalidate the old TLBs diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0d68d40..24d4e65 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) dsb ish @@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run) ldr r1, [vcpu, #VCPU_KVM] add r1, r1, #KVM_VTTBR ldrd r2, r3, [r1] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR @ We're all done, just restore the GPRs and go to the guest restore_guest_regs diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 68d99c6..98c8c5b 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -520,7 +520,7 @@ ARM_BE8(rev r6, r6 ) mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL isb - mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 strd r2, r3, [r5] @@ -560,12 +560,12 @@ ARM_BE8(rev r6, r6 ) ldr r2, [r4, #KVM_TIMER_CNTVOFF] ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] - mcrr p15, 4, r2, r3, c14 @ CNTVOFF + mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 ldrd r2, r3, [r5] - mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL isb ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] -- cgit v0.10.2 From 6d7311b520864531c81f0e0237e96146d8057d77 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:03 -0700 Subject: ARM: KVM: __kvm_vcpu_run function return result fix in BE case The __kvm_vcpu_run function returns a 64-bit result in two registers, which has to be adjusted for BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 24d4e65..01dcb0e 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -199,8 +199,13 @@ after_vfp_restore: restore_host_regs clrex @ Clear exclusive monitor +#ifndef CONFIG_CPU_ENDIAN_BE8 mov r0, r1 @ Return the return code mov r1, #0 @ Clear upper bits in return value +#else + @ r1 already has return code + mov r0, #0 @ Clear upper bits in return value +#endif /* CONFIG_CPU_ENDIAN_BE8 */ bx lr @ return to IOCTL /******************************************************************** -- cgit v0.10.2 From 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:04 -0700 Subject: ARM: KVM: vgic mmio should hold data as LE bytes array in BE case According to recent clarifications of mmio.data array meaning - the mmio.data array should hold bytes as they would appear in memory. Vgic is little endian device. And in case of BE image kernel side that emulates vgic, holds data in BE form. So we need to byteswap cpu<->le32 vgic registers when we read/write them from mmio.data[]. Change has no effect in LE case because cpu already runs in le32. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 795ab48..b0edc8c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { - return *((u32 *)mmio->data) & mask; + return le32_to_cpu(*((u32 *)mmio->data)) & mask; } static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) { - *((u32 *)mmio->data) = value & mask; + *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } /** -- cgit v0.10.2 From 27f194fd360a96cc64bebb2d69dd5abd67984b8a Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:05 -0700 Subject: ARM: KVM: MMIO support BE host running LE code In case of status register E bit is not set (LE mode) and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 0fa90c9..69b7469 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be32_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + default: + return le32_to_cpu(data); + } } - - return data; /* Leave LE untouched */ } static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, @@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be32(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + default: + return cpu_to_le32(data); + } } - - return data; /* Leave LE untouched */ } #endif /* __ARM_KVM_EMULATE_H__ */ -- cgit v0.10.2 From 73891f72c414afff6da6f01e7af2ff5a44a8b823 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:06 -0700 Subject: ARM: KVM: one_reg coproc set and get BE fixes Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE image. Before this fix get/set_one_reg functions worked correctly only in LE case - reg_from_user was taking 'void *' kernel address that actually could be target/source memory of either 4 bytes size or 8 bytes size, and code copied from/to user memory that could hold either 4 bytes register, 8 byte register or pair of 4 bytes registers. In order to work in endian agnostic way reg_from_user to reg_to_user functions should copy register value only to kernel variable with size that matches register size. In few place where size mismatch existed fix issue on macro caller side. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index c58a351..37a0fe1 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -44,6 +44,31 @@ static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ #define CSSELR_MAX 12 +/* + * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some + * of cp15 registers can be viewed either as couple of two u32 registers + * or one u64 register. Current u64 register encoding is that least + * significant u32 word is followed by most significant u32 word. + */ +static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, + const struct coproc_reg *r, + u64 val) +{ + vcpu->arch.cp15[r->reg] = val & 0xffffffff; + vcpu->arch.cp15[r->reg + 1] = val >> 32; +} + +static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, + const struct coproc_reg *r) +{ + u64 val; + + val = vcpu->arch.cp15[r->reg + 1]; + val = val << 32; + val = val | vcpu->arch.cp15[r->reg]; + return val; +} + int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run) { kvm_inject_undefined(vcpu); @@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; +/* + * Reads a register value from a userspace address to a kernel + * variable. Make sure that register size matches sizeof(*__val). + */ static int reg_from_user(void *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } +/* + * Writes a register value to a userspace address from a kernel variable. + * Make sure that register size matches sizeof(*__val). + */ static int reg_to_user(void __user *uaddr, const void *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; @@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) { struct coproc_params params; const struct coproc_reg *r; + int ret; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - return reg_to_user(uaddr, &r->val, id); + ret = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val = r->val; + + ret = reg_to_user(uaddr, &val, id); + } else if (KVM_REG_SIZE(id) == 8) { + ret = reg_to_user(uaddr, &r->val, id); + } + return ret; } static int set_invariant_cp15(u64 id, void __user *uaddr) @@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) struct coproc_params params; const struct coproc_reg *r; int err; - u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + u64 val; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - err = reg_from_user(&val, uaddr, id); + err = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val32; + + err = reg_from_user(&val32, uaddr, id); + if (!err) + val = val32; + } else if (KVM_REG_SIZE(id) == 8) { + err = reg_from_user(&val, uaddr, id); + } if (err) return err; @@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); @@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return get_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit. */ - return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + val = vcpu_cp15_reg64_get(vcpu, r); + ret = reg_to_user(uaddr, &val, reg->id); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + } + + return ret; } int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); @@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return set_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit */ - return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + ret = reg_from_user(&val, uaddr, reg->id); + if (!ret) + vcpu_cp15_reg64_set(vcpu, r, val); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + } + + return ret; } static unsigned int num_demux_regs(void) -- cgit v0.10.2 From f5aa462147a209dab40b02f0f70234784b913bf9 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:07 -0700 Subject: ARM: KVM: enable KVM in Kconfig on big-endian systems Previous patches addresses ARMV7 big-endian virtualiztion, kvm related issues, so enable ARM_VIRT_EXT for big-endian now. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 4be5bb1..466bd29 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN + depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. -- cgit v0.10.2 From b30070862edbdb252f9d0d3a1e61b8dc4c68e3d2 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:08 -0700 Subject: ARM64: KVM: MMIO support BE host running LE code In case of guest CPU running in LE mode and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index dd8ecfc3..fdc3e21 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be64_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + case 4: + return le32_to_cpu(data & 0xffffffff); + default: + return le64_to_cpu(data); + } } return data; /* Leave LE untouched */ @@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be64(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + case 4: + return cpu_to_le32(data & 0xffffffff); + default: + return cpu_to_le64(data); + } } return data; /* Leave LE untouched */ -- cgit v0.10.2 From ba083d20d8cfa9e999043cd89c4ebc964ccf8927 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:09 -0700 Subject: ARM64: KVM: store kvm_vcpu_fault_info est_el2 as word esr_el2 field of struct kvm_vcpu_fault_info has u32 type. It should be stored as word. Current code works in LE case because existing puts least significant word of x1 into esr_el2, and it puts most significant work of x1 into next field, which accidentally is OK because it is updated again by next instruction. But existing code breaks in BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5945f3b..7874e02 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -762,7 +762,7 @@ el1_trap: mrs x2, far_el2 2: mrs x0, tpidr_el2 - str x1, [x0, #VCPU_ESR_EL2] + str w1, [x0, #VCPU_ESR_EL2] str x2, [x0, #VCPU_FAR_EL2] str x3, [x0, #VCPU_HPFAR_EL2] -- cgit v0.10.2 From 9662fb4854e1319b4affda47f279c3f210316def Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:10 -0700 Subject: ARM64: KVM: fix vgic_bitmap_get_reg function for BE 64bit case Fix vgic_bitmap_get_reg function to return 'right' word address of 'unsigned long' bitmap value in case of BE 64bit image. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b0edc8c..ede8f64 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; +/* + * struct vgic_bitmap contains unions that provide two views of + * the same data. In one case it is an array of registers of + * u32's, and in the other case it is a bitmap of unsigned + * longs. + * + * This does not work on 64-bit BE systems, because the bitmap access + * will store two consecutive 32-bit words with the higher-addressed + * register's bits at the lower index and the lower-addressed register's + * bits at the higher index. + * + * Therefore, swizzle the register index when accessing the 32-bit word + * registers to access the right register's value. + */ +#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 +#define REG_OFFSET_SWIZZLE 1 +#else +#define REG_OFFSET_SWIZZLE 0 +#endif + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg; + return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); else - return x->shared.reg + offset - 1; + return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, -- cgit v0.10.2 From 26c99af1018c35020cfad1d20f02acb224807655 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:12 -0700 Subject: ARM64: KVM: set and get of sys registers in BE case Since size of all sys registers is always 8 bytes. Current code is actually endian agnostic. Just clean it up a bit. Removed comment about little endian. Change type of pointer from 'void *' to 'u64 *' to enforce stronger type checking. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 34f25a5..f0cecef 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -776,17 +776,15 @@ static struct sys_reg_desc invariant_sys_regs[] = { NULL, get_ctr_el0 }, }; -static int reg_from_user(void *val, const void __user *uaddr, u64 id) +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } -static int reg_to_user(void __user *uaddr, const void *val, u64 id) +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; -- cgit v0.10.2 From f0a3eaff71b8bd5d5acfda1f0cf3eedf49755622 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Wed, 2 Jul 2014 17:19:30 +0100 Subject: ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest Fix issue with 32bit guests running on top of BE KVM host. Indexes of high and low words of 64bit cp15 register are swapped in case of big endian code, since 64bit cp15 state is restored or saved with double word write or read instruction. Define helper macro to access low words of 64bit cp15 register. Signed-off-by: Victor Kamensky Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4ae9213..503c706 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -140,6 +140,12 @@ struct kvm_vcpu_arch { #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) #define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +#ifdef CONFIG_CPU_BIG_ENDIAN +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#else +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#endif + struct kvm_vm_stat { u32 remote_tlb_flush; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f0cecef..56288f3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -134,13 +134,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32) { + if (!p->is_aarch32 || !p->is_32bit) vcpu_sys_reg(vcpu, r->reg) = val; - } else { - vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL; - if (!p->is_32bit) - vcpu_cp15(vcpu, r->reg + 1) = val >> 32; - } + else + vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + return true; } -- cgit v0.10.2 From 7609c1251f9d8bbcd6a05ba22153e50cf4f88cff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:21:16 +0100 Subject: arm64: KVM: rename pm_fake handler to trap_raz_wi pm_fake doesn't quite describe what the handler does (ignoring writes and returning 0 for reads). As we're about to use it (a lot) in a different context, rename it with a (admitedly cryptic) name that make sense for all users. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 56288f3..492ba30 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -161,18 +161,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, return true; } -/* - * We could trap ID_DFR0 and tell the guest we don't support performance - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was - * NAKed, so it will read the PMCR anyway. - * - * Therefore we tell the guest we have 0 counters. Unfortunately, we - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for - * all PM registers, which doesn't crash the guest kernel at least. - */ -static bool pm_fake(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool trap_raz_wi(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) { if (p->is_write) return ignore_write(vcpu, p); @@ -199,6 +190,17 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + * + * Same goes for the whole debug infrastructure, which probably breaks + * some guest functionnality. This should be fixed. */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -258,10 +260,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMINTENCLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), @@ -290,43 +292,43 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMCR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMCNTENSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMCNTENCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMOVSCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* PMSWINC_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - pm_fake }, + trap_raz_wi }, /* PMSELR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - pm_fake }, + trap_raz_wi }, /* PMCEID0_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - pm_fake }, + trap_raz_wi }, /* PMCEID1_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - pm_fake }, + trap_raz_wi }, /* PMCCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMXEVTYPER_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMXEVCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMUSERENR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMOVSSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* TPIDR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), @@ -372,19 +374,20 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, - { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, + /* PMU */ + { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, -- cgit v0.10.2 From 51ba248164d0eeb8b4f94d405430c18a56c6ac9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 12:13:14 +0100 Subject: arm64: move DBG_MDSCR_* to asm/debug-monitors.h In order to be able to use the DBG_MDSCR_* macros from the KVM code, move the relevant definitions to the obvious include file. Also move the debug_el enum to a portion of the file that is guarded by #ifndef __ASSEMBLY__ in order to use that file from assembly code. Acked-by: Will Deacon Reviewed-by: Anup Patel Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 6e9b5b3..7fb3437 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,15 @@ #ifdef __KERNEL__ +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ @@ -73,11 +82,6 @@ #define CACHE_FLUSH_IS_SAFE 1 -enum debug_el { - DBG_ACTIVE_EL0 = 0, - DBG_ACTIVE_EL1, -}; - /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 @@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); +enum debug_el { + DBG_ACTIVE_EL0 = 0, + DBG_ACTIVE_EL1, +}; + void enable_debug_monitors(enum debug_el el); void disable_debug_monitors(enum debug_el el); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a7fb874..e022f87 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -30,15 +30,6 @@ #include #include -/* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) -#define DBG_SPSR_SS (1 << 21) - -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - /* Determine debug architecture. */ u8 debug_monitors_arch(void) { -- cgit v0.10.2 From 0c557ed4983b7abe152212b5b1726c2a789b2c61 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:24:46 +0100 Subject: arm64: KVM: add trap handlers for AArch64 debug registers Add handlers for all the AArch64 debug registers that are accessible from EL0 or EL1. The trapping code keeps track of the state of the debug registers, allowing for the switch code to implement a lazy switching strategy. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a28c35b..660f75c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -45,14 +45,25 @@ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ +#define DBGBCR15_EL1 38 +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ +#define DBGBVR15_EL1 54 +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ +#define DBGWCR15_EL1 70 +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ +#define DBGWVR15_EL1 86 +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ + /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 22 /* Domain Access Control Register */ -#define IFSR32_EL2 23 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 28 +#define DACR32_EL2 88 /* Domain Access Control Register */ +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 94 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -89,6 +100,9 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 503c706..8e410f7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -101,6 +101,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* Debug state */ + u64 debug_flags; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 492ba30..d53ce43 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "sys_regs.h" @@ -171,6 +172,73 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + *vcpu_reg(vcpu, p->Rt) = (1 << 3); + return true; + } +} + +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u32 val; + asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); + *vcpu_reg(vcpu, p->Rt) = val; + return true; + } +} + +/* + * We want to avoid world-switching all the DBG registers all the + * time: + * + * - If we've touched any debug register, it is likely that we're + * going to touch more of them. It then makes sense to disable the + * traps and start doing the save/restore dance + * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is + * then mandatory to save/restore the registers, as the guest + * depends on them. + * + * For this, we use a DIRTY bit, indicating the guest has modified the + * debug registers, used as follow: + * + * On guest entry: + * - If the dirty bit is set (because we're coming back from trapping), + * disable the traps, save host registers, restore guest registers. + * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), + * set the dirty bit, disable the traps, save host registers, + * restore guest registers. + * - Otherwise, enable the traps + * + * On guest exit: + * - If the dirty bit is set, save guest registers, restore host + * registers and clear the dirty bit. This ensure that the host can + * now use the debug registers. + */ +static bool trap_debug_regs(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); + } + + return true; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -187,6 +255,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); } +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ + /* DBGBVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + /* DBGBCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + /* DBGWVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + /* DBGWCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -199,8 +282,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * must always support PMCCNTR (the cycle counter): we just RAZ/WI for * all PM registers, which doesn't crash the guest kernel at least. * - * Same goes for the whole debug infrastructure, which probably breaks - * some guest functionnality. This should be fixed. + * Debug handling: We do trap most, if not all debug related system + * registers. The implementation is good enough to ensure that a guest + * can use these with minimal performance degradation. The drawback is + * that we don't implement any of the external debug, none of the + * OSlock protocol. This should be revisited if we ever encounter a + * more demanding guest... */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -213,12 +300,71 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), access_dcsw }, + DBG_BCR_BVR_WCR_WVR_EL1(0), + DBG_BCR_BVR_WCR_WVR_EL1(1), + /* MDCCINT_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + /* MDSCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + DBG_BCR_BVR_WCR_WVR_EL1(2), + DBG_BCR_BVR_WCR_WVR_EL1(3), + DBG_BCR_BVR_WCR_WVR_EL1(4), + DBG_BCR_BVR_WCR_WVR_EL1(5), + DBG_BCR_BVR_WCR_WVR_EL1(6), + DBG_BCR_BVR_WCR_WVR_EL1(7), + DBG_BCR_BVR_WCR_WVR_EL1(8), + DBG_BCR_BVR_WCR_WVR_EL1(9), + DBG_BCR_BVR_WCR_WVR_EL1(10), + DBG_BCR_BVR_WCR_WVR_EL1(11), + DBG_BCR_BVR_WCR_WVR_EL1(12), + DBG_BCR_BVR_WCR_WVR_EL1(13), + DBG_BCR_BVR_WCR_WVR_EL1(14), + DBG_BCR_BVR_WCR_WVR_EL1(15), + + /* MDRAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + trap_raz_wi }, + /* OSLAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), + trap_raz_wi }, + /* OSLSR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), + trap_oslsr_el1 }, + /* OSDLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), + trap_raz_wi }, + /* DBGPRCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), + trap_raz_wi }, + /* DBGCLAIMSET_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), + trap_raz_wi }, + /* DBGCLAIMCLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), + trap_raz_wi }, + /* DBGAUTHSTATUS_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), + trap_dbgauthstatus_el1 }, + /* TEECR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), NULL, reset_val, TEECR32_EL1, 0 }, /* TEEHBR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), NULL, reset_val, TEEHBR32_EL1, 0 }, + + /* MDCCSR_EL1 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), + trap_raz_wi }, + /* DBGDTR_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), + trap_raz_wi }, + /* DBGDTR[TR]X_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), + trap_raz_wi }, + /* DBGVCR32_EL2 */ { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), NULL, reset_val, DBGVCR32_EL2, 0 }, -- cgit v0.10.2 From 72564016aae45f42e488f926bc803f9a2e1c771c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:27:13 +0100 Subject: arm64: KVM: common infrastructure for handling AArch32 CP14/CP15 As we're about to trap a bunch of CP14 registers, let's rework the CP15 handling so it can be generalized and work with multiple tables. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 660f75c..69027de 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,7 +95,7 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ -#define NR_CP15_REGS (NR_SYS_REGS * 2) +#define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 9a59301..0b52377 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8e410f7..79812be 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -86,7 +86,7 @@ struct kvm_cpu_context { struct kvm_regs gp_regs; union { u64 sys_regs[NR_SYS_REGS]; - u32 cp15[NR_CP15_REGS]; + u32 copro[NR_COPRO_REGS]; }; }; @@ -141,12 +141,17 @@ struct kvm_vcpu_arch { #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +/* + * CP14 and CP15 live in the same array, as they are backed by the + * same system registers. + */ +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 182415e..e28be51 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI] = kvm_handle_wfx, [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, - [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, - [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, [ESR_EL2_EC_HVC32] = handle_hvc, [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d53ce43..266afd9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,6 +494,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; +/* Trapped cp14 registers */ +static const struct sys_reg_desc cp14_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit @@ -601,26 +605,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_inject_undefined(vcpu); - return 1; -} - -static void emulate_cp15(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) +/* + * emulate_cp -- tries to match a sys_reg access in a handling table, and + * call the corresponding trap handler. + * + * @params: pointer to the descriptor of the access + * @table: array of trap descriptors + * @num: size of the trap descriptor array + * + * Return 0 if the access has been handled, and -1 if not. + */ +static int emulate_cp(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params, + const struct sys_reg_desc *table, + size_t num) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; - table = get_target_table(vcpu->arch.target, false, &num); + if (!table) + return -1; /* Not handled */ - /* Search target-specific then generic table. */ r = find_reg(params, table, num); - if (!r) - r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); - if (likely(r)) { + if (r) { /* * Not having an accessor means that we have * configured a trap that we don't know how to @@ -632,22 +639,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return; } - /* If access function fails, it should complain. */ + + /* Handled */ + return 0; } - kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + /* Not handled */ + return -1; +} + +static void unhandled_cp_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + int cp; + + switch(hsr_ec) { + case ESR_EL2_EC_CP15_32: + case ESR_EL2_EC_CP15_64: + cp = 15; + break; + case ESR_EL2_EC_CP14_MR: + case ESR_EL2_EC_CP14_64: + cp = 14; + break; + default: + WARN_ON((cp = -1)); + } + + kvm_err("Unsupported guest CP%d access at: %08lx\n", + cp, *vcpu_pc(vcpu)); print_sys_reg_instr(params); kvm_inject_undefined(vcpu); } /** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -676,8 +712,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) *vcpu_reg(vcpu, params.Rt) = val; } - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + goto out; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + goto out; + unhandled_cp_access(vcpu, ¶ms); + +out: /* Do the opposite hack for the read side */ if (!params.is_write) { u64 val = *vcpu_reg(vcpu, params.Rt); @@ -693,7 +735,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -708,10 +754,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (hsr >> 14) & 0x7; params.Op2 = (hsr >> 17) & 0x7; - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + return 1; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + return 1; + + unhandled_cp_access(vcpu, ¶ms); return 1; } +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_64(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_32(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_64(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_32(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *params) { -- cgit v0.10.2 From a9866ba0cddfc497335fa02a175c4578b96722ff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 14:11:48 +0100 Subject: arm64: KVM: use separate tables for AArch32 32 and 64bit traps An interesting "feature" of the CP14 encoding is that there is an overlap between 32 and 64bit registers, meaning they cannot live in the same table as we did for CP15. Create separate tables for 64bit CP14 and CP15 registers, and let the top level handler use the right one. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 266afd9..499a351 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -498,13 +498,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { static const struct sys_reg_desc cp14_regs[] = { }; +/* Trapped cp14 64bit registers */ +static const struct sys_reg_desc cp14_64_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -545,6 +548,10 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, +}; + +static const struct sys_reg_desc cp15_64_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; @@ -770,7 +777,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) target_specific = get_target_table(vcpu->arch.target, false, &num); return kvm_handle_cp_64(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), + cp15_64_regs, ARRAY_SIZE(cp15_64_regs), target_specific, num); } @@ -788,7 +795,7 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { return kvm_handle_cp_64(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), + cp14_64_regs, ARRAY_SIZE(cp14_64_regs), NULL, 0); } -- cgit v0.10.2 From e6a9551760623d1703487e8a16bb9c3ea8a7e7a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:43:39 +0100 Subject: arm64: KVM: check ordering of all system register tables We now have multiple tables for the various system registers we trap. Make sure we check the order of all of them, as it is critical that we get the order right (been there, done that...). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 499a351..8ab47c7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1308,14 +1308,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i < n; i++) { + if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) - BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) -- cgit v0.10.2 From bdfb4b389c8d8f07e2d5b8e1291e01c789ba4aad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:31:37 +0100 Subject: arm64: KVM: add trap handlers for AArch32 debug registers Add handlers for all the AArch32 debug registers that are accessible from EL0 or EL1. The code follow the same strategy as the AArch64 counterpart with regards to tracking the dirty state of the debug registers. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 69027de..4838421 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,6 +95,15 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + #define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8ab47c7..a4fd526 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,12 +494,153 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; -/* Trapped cp14 registers */ +static bool trap_dbgidr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + u32 el3 = !!((pfr >> 12) & 0xf); + + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | + (((dfr >> 12) & 0xf) << 24) | + (((dfr >> 28) & 0xf) << 20) | + (6 << 16) | (el3 << 14) | (el3 << 12)); + return true; + } +} + +static bool trap_debug32(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg); + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ + NULL, (cp14_DBGBVR0 + (n) * 2) }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ + NULL, (cp14_DBGBCR0 + (n) * 2) }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ + NULL, (cp14_DBGWVR0 + (n) * 2) }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ + NULL, (cp14_DBGWCR0 + (n) * 2) } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ + NULL, cp14_DBGBXVR0 + n * 2 } + +/* + * Trapped cp14 registers. We generally ignore most of the external + * debug, on the principle that they don't really make sense to a + * guest. Revisit this one day, whould this principle change. + */ static const struct sys_reg_desc cp14_regs[] = { + /* DBGIDR */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr }, + /* DBGDTRRXext */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi }, + + DBG_BCR_BVR_WCR_WVR(0), + /* DBGDSCRint */ + { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(1), + /* DBGDCCINT */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + /* DBGDSCRext */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(2), + /* DBGDTR[RT]Xint */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, + /* DBGDTR[RT]Xext */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(3), + DBG_BCR_BVR_WCR_WVR(4), + DBG_BCR_BVR_WCR_WVR(5), + /* DBGWFAR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi }, + /* DBGOSECCR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(6), + /* DBGVCR */ + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(7), + DBG_BCR_BVR_WCR_WVR(8), + DBG_BCR_BVR_WCR_WVR(9), + DBG_BCR_BVR_WCR_WVR(10), + DBG_BCR_BVR_WCR_WVR(11), + DBG_BCR_BVR_WCR_WVR(12), + DBG_BCR_BVR_WCR_WVR(13), + DBG_BCR_BVR_WCR_WVR(14), + DBG_BCR_BVR_WCR_WVR(15), + + /* DBGDRAR (32bit) */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi }, + + DBGBXVR(0), + /* DBGOSLAR */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + DBGBXVR(1), + /* DBGOSLSR */ + { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 }, + DBGBXVR(2), + DBGBXVR(3), + /* DBGOSDLR */ + { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi }, + DBGBXVR(4), + /* DBGPRCR */ + { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi }, + DBGBXVR(5), + DBGBXVR(6), + DBGBXVR(7), + DBGBXVR(8), + DBGBXVR(9), + DBGBXVR(10), + DBGBXVR(11), + DBGBXVR(12), + DBGBXVR(13), + DBGBXVR(14), + DBGBXVR(15), + + /* DBGDSAR (32bit) */ + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi }, + + /* DBGDEVID2 */ + { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi }, + /* DBGDEVID1 */ + { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi }, + /* DBGDEVID */ + { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi }, + /* DBGCLAIMSET */ + { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi }, + /* DBGCLAIMCLR */ + { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi }, + /* DBGAUTHSTATUS */ + { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 }, }; /* Trapped cp14 64bit registers */ static const struct sys_reg_desc cp14_64_regs[] = { + /* DBGDRAR (64bit) */ + { Op1( 0), CRm( 1), .access = trap_raz_wi }, + + /* DBGDSAR (64bit) */ + { Op1( 0), CRm( 2), .access = trap_raz_wi }, }; /* @@ -547,7 +688,6 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, - }; static const struct sys_reg_desc cp15_64_regs[] = { -- cgit v0.10.2 From b0e626b380872b663918230fafdac128c34fea56 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:44:49 +0100 Subject: arm64: KVM: implement lazy world switch for debug registers Implement switching of the debug registers. While the number of registers is massive, CPUs usually don't implement them all (A57 has 6 breakpoints and 4 watchpoints, which gives us a total of 22 registers "only"). Also, we only save/restore them when MDSCR_EL1 has debug enabled, or when we've flagged the debug registers as dirty. It means that most of the time, we only save/restore MDSCR_EL1. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e74654c..9a9fce0 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -120,6 +120,7 @@ int main(void) DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7874e02..100494b 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,7 @@ mrs x22, amair_el1 mrs x23, cntkctl_el1 mrs x24, par_el1 + mrs x25, mdscr_el1 stp x4, x5, [x3] stp x6, x7, [x3, #16] @@ -222,7 +224,202 @@ stp x18, x19, [x3, #112] stp x20, x21, [x3, #128] stp x22, x23, [x3, #144] - str x24, [x3, #160] + stp x24, x25, [x3, #160] +.endm + +.macro save_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbcr15_el1 + mrs x19, dbgbcr14_el1 + mrs x18, dbgbcr13_el1 + mrs x17, dbgbcr12_el1 + mrs x16, dbgbcr11_el1 + mrs x15, dbgbcr10_el1 + mrs x14, dbgbcr9_el1 + mrs x13, dbgbcr8_el1 + mrs x12, dbgbcr7_el1 + mrs x11, dbgbcr6_el1 + mrs x10, dbgbcr5_el1 + mrs x9, dbgbcr4_el1 + mrs x8, dbgbcr3_el1 + mrs x7, dbgbcr2_el1 + mrs x6, dbgbcr1_el1 + mrs x5, dbgbcr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbvr15_el1 + mrs x19, dbgbvr14_el1 + mrs x18, dbgbvr13_el1 + mrs x17, dbgbvr12_el1 + mrs x16, dbgbvr11_el1 + mrs x15, dbgbvr10_el1 + mrs x14, dbgbvr9_el1 + mrs x13, dbgbvr8_el1 + mrs x12, dbgbvr7_el1 + mrs x11, dbgbvr6_el1 + mrs x10, dbgbvr5_el1 + mrs x9, dbgbvr4_el1 + mrs x8, dbgbvr3_el1 + mrs x7, dbgbvr2_el1 + mrs x6, dbgbvr1_el1 + mrs x5, dbgbvr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwcr15_el1 + mrs x19, dbgwcr14_el1 + mrs x18, dbgwcr13_el1 + mrs x17, dbgwcr12_el1 + mrs x16, dbgwcr11_el1 + mrs x15, dbgwcr10_el1 + mrs x14, dbgwcr9_el1 + mrs x13, dbgwcr8_el1 + mrs x12, dbgwcr7_el1 + mrs x11, dbgwcr6_el1 + mrs x10, dbgwcr5_el1 + mrs x9, dbgwcr4_el1 + mrs x8, dbgwcr3_el1 + mrs x7, dbgwcr2_el1 + mrs x6, dbgwcr1_el1 + mrs x5, dbgwcr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwvr15_el1 + mrs x19, dbgwvr14_el1 + mrs x18, dbgwvr13_el1 + mrs x17, dbgwvr12_el1 + mrs x16, dbgwvr11_el1 + mrs x15, dbgwvr10_el1 + mrs x14, dbgwvr9_el1 + mrs x13, dbgwvr8_el1 + mrs x12, dbgwvr7_el1 + mrs x11, dbgwvr6_el1 + mrs x10, dbgwvr5_el1 + mrs x9, dbgwvr4_el1 + mrs x8, dbgwvr3_el1 + mrs x7, dbgwvr2_el1 + mrs x6, dbgwvr1_el1 + mrs x5, dbgwvr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] .endm .macro restore_sysregs @@ -241,7 +438,7 @@ ldp x18, x19, [x3, #112] ldp x20, x21, [x3, #128] ldp x22, x23, [x3, #144] - ldr x24, [x3, #160] + ldp x24, x25, [x3, #160] msr vmpidr_el2, x4 msr csselr_el1, x5 @@ -264,6 +461,198 @@ msr amair_el1, x22 msr cntkctl_el1, x23 msr par_el1, x24 + msr mdscr_el1, x25 +.endm + +.macro restore_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbcr15_el1, x20 + msr dbgbcr14_el1, x19 + msr dbgbcr13_el1, x18 + msr dbgbcr12_el1, x17 + msr dbgbcr11_el1, x16 + msr dbgbcr10_el1, x15 + msr dbgbcr9_el1, x14 + msr dbgbcr8_el1, x13 + msr dbgbcr7_el1, x12 + msr dbgbcr6_el1, x11 + msr dbgbcr5_el1, x10 + msr dbgbcr4_el1, x9 + msr dbgbcr3_el1, x8 + msr dbgbcr2_el1, x7 + msr dbgbcr1_el1, x6 + msr dbgbcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbvr15_el1, x20 + msr dbgbvr14_el1, x19 + msr dbgbvr13_el1, x18 + msr dbgbvr12_el1, x17 + msr dbgbvr11_el1, x16 + msr dbgbvr10_el1, x15 + msr dbgbvr9_el1, x14 + msr dbgbvr8_el1, x13 + msr dbgbvr7_el1, x12 + msr dbgbvr6_el1, x11 + msr dbgbvr5_el1, x10 + msr dbgbvr4_el1, x9 + msr dbgbvr3_el1, x8 + msr dbgbvr2_el1, x7 + msr dbgbvr1_el1, x6 + msr dbgbvr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwcr15_el1, x20 + msr dbgwcr14_el1, x19 + msr dbgwcr13_el1, x18 + msr dbgwcr12_el1, x17 + msr dbgwcr11_el1, x16 + msr dbgwcr10_el1, x15 + msr dbgwcr9_el1, x14 + msr dbgwcr8_el1, x13 + msr dbgwcr7_el1, x12 + msr dbgwcr6_el1, x11 + msr dbgwcr5_el1, x10 + msr dbgwcr4_el1, x9 + msr dbgwcr3_el1, x8 + msr dbgwcr2_el1, x7 + msr dbgwcr1_el1, x6 + msr dbgwcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwvr15_el1, x20 + msr dbgwvr14_el1, x19 + msr dbgwvr13_el1, x18 + msr dbgwvr12_el1, x17 + msr dbgwvr11_el1, x16 + msr dbgwvr10_el1, x15 + msr dbgwvr9_el1, x14 + msr dbgwvr8_el1, x13 + msr dbgwvr7_el1, x12 + msr dbgwvr6_el1, x11 + msr dbgwvr5_el1, x10 + msr dbgwvr4_el1, x9 + msr dbgwvr3_el1, x8 + msr dbgwvr2_el1, x7 + msr dbgwvr1_el1, x6 + msr dbgwvr0_el1, x5 + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 .endm .macro skip_32bit_state tmp, target @@ -278,6 +667,35 @@ tbz \tmp, #12, \target .endm +.macro skip_debug_state tmp, target + ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] + tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target +.endm + +.macro compute_debug_state target + // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY + // is set, we do a full save/restore cycle and disable trapping. + add x25, x0, #VCPU_CONTEXT + + // Check the state of MDSCR_EL1 + ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] + and x26, x25, #DBG_MDSCR_KDE + and x25, x25, #DBG_MDSCR_MDE + adds xzr, x25, x26 + b.eq 9998f // Nothing to see there + + // If any interesting bits was set, we must set the flag + mov x26, #KVM_ARM64_DEBUG_DIRTY + str x26, [x0, #VCPU_DEBUG_FLAGS] + b 9999f // Don't skip restore + +9998: + // Otherwise load the flags from memory in case we recently + // trapped + skip_debug_state x25, \target +9999: +.endm + .macro save_guest_32bit_state skip_32bit_state x3, 1f @@ -293,10 +711,13 @@ mrs x4, dacr32_el2 mrs x5, ifsr32_el2 mrs x6, fpexc32_el2 - mrs x7, dbgvcr32_el2 stp x4, x5, [x3] - stp x6, x7, [x3, #16] + str x6, [x3, #16] + skip_debug_state x8, 2f + mrs x7, dbgvcr32_el2 + str x7, [x3, #24] +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -319,12 +740,15 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] + ldr x6, [x3, #16] msr dacr32_el2, x4 msr ifsr32_el2, x5 msr fpexc32_el2, x6 - msr dbgvcr32_el2, x7 + skip_debug_state x8, 2f + ldr x7, [x3, #24] + msr dbgvcr32_el2, x7 +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -463,6 +887,14 @@ __restore_sysregs: restore_sysregs ret +__save_debug: + save_debug + ret + +__restore_debug: + restore_debug + ret + __save_fpsimd: save_fpsimd ret @@ -494,6 +926,9 @@ ENTRY(__kvm_vcpu_run) bl __save_fpsimd bl __save_sysregs + compute_debug_state 1f + bl __save_debug +1: activate_traps activate_vm @@ -505,6 +940,10 @@ ENTRY(__kvm_vcpu_run) bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + bl __restore_debug +1: restore_guest_32bit_state restore_guest_regs @@ -521,6 +960,10 @@ __kvm_vcpu_return: save_guest_regs bl __save_fpsimd bl __save_sysregs + + skip_debug_state x3, 1f + bl __save_debug +1: save_guest_32bit_state save_timer_state @@ -535,6 +978,14 @@ __kvm_vcpu_return: bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + // Clear the dirty flag for the next run, as all the state has + // already been saved. Note that we nuke the whole 64bit word. + // If we ever add more flags, we'll have to be more careful... + str xzr, [x0, #VCPU_DEBUG_FLAGS] + bl __restore_debug +1: restore_host_regs mov x0, x1 -- cgit v0.10.2 From d329de09333aeee127aaf22eb7cee9c2dc4cf475 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:32:03 +0100 Subject: arm64: KVM: enable trapping of all debug registers Enable trapping of the debug registers, preventing the guests to mess with the host state (and allowing guests to use the debug infrastructure as well). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 100494b..b72aa9f 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -770,6 +770,14 @@ mrs x2, mdcr_el2 and x2, x2, #MDCR_EL2_HPMN_MASK orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) + + // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap + // if not dirty. + ldr x3, [x0, #VCPU_DEBUG_FLAGS] + tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f + orr x2, x2, #MDCR_EL2_TDA +1: msr mdcr_el2, x2 .endm -- cgit v0.10.2 From 8f42ab2749d00ea15157ab896cfbed73a247b3e1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2014 17:38:02 +0530 Subject: KVM: PPC: BOOK3S: PR: Emulate virtual timebase register virtual time base register is a per VM, per cpu register that needs to be saved and restored on vm exit and entry. Writing to VTB is not allowed in the privileged mode. Signed-off-by: Aneesh Kumar K.V [agraf: fix compile error] Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a58731..bd3caea 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -505,6 +505,7 @@ struct kvm_vcpu_arch { #endif /* Time base value when we entered the guest */ u64 entry_tb; + u64 entry_vtb; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bffd89d..c8f3381 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1203,6 +1203,15 @@ : "r" ((unsigned long)(v)) \ : "memory") +static inline unsigned long mfvtb (void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfspr(SPRN_VTB); +#endif + return 0; +} + #ifdef __powerpc64__ #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1d428e60..03cbada 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,15 @@ static inline u64 get_rtc(void) return (u64)hi * 1000000000 + lo; } +static inline u64 get_vtb(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfvtb(); +#endif + return 0; +} + #ifdef CONFIG_PPC64 static inline u64 get_tb(void) { diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c254c27..ddce1ea 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -646,6 +646,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: val = get_reg_val(reg->id, vcpu->arch.bescr); break; + case KVM_REG_PPC_VTB: + val = get_reg_val(reg->id, vcpu->arch.vtb); + break; default: r = -EINVAL; break; @@ -750,6 +753,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(reg->id, val); + break; default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3565e77..1bb16a5 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -577,6 +577,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val */ *spr_val = vcpu->arch.spurr; break; + case SPRN_VTB: + *spr_val = vcpu->arch.vtb; + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7a12edb..315e884 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -897,9 +897,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IC: *val = get_reg_val(id, vcpu->arch.ic); break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -1097,9 +1094,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IC: vcpu->arch.ic = set_reg_val(id, *val); break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 671f5c92..d2deb9e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -125,6 +125,7 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, * to find the guest purr and spurr value. */ vcpu->arch.entry_tb = get_tb(); + vcpu->arch.entry_vtb = get_vtb(); svcpu->in_use = true; } @@ -176,7 +177,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, */ vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; - + vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; svcpu->in_use = false; out: -- cgit v0.10.2 From 06da28e76b87331ebccdb6d486cfd94835b8be5e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2014 17:38:05 +0530 Subject: KVM: PPC: BOOK3S: PR: Emulate instruction counter Writing to IC is not allowed in the privileged mode. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bd3caea..f9ae696 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -506,6 +506,7 @@ struct kvm_vcpu_arch { /* Time base value when we entered the guest */ u64 entry_tb; u64 entry_vtb; + u64 entry_ic; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ddce1ea..90aa5c7 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -649,6 +649,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_VTB: val = get_reg_val(reg->id, vcpu->arch.vtb); break; + case KVM_REG_PPC_IC: + val = get_reg_val(reg->id, vcpu->arch.ic); + break; default: r = -EINVAL; break; @@ -756,6 +759,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_VTB: vcpu->arch.vtb = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(reg->id, val); + break; default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1bb16a5..84fddcd 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -580,6 +580,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_VTB: *spr_val = vcpu->arch.vtb; break; + case SPRN_IC: + *spr_val = vcpu->arch.ic; + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 315e884..1562acf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -894,9 +894,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; - case KVM_REG_PPC_IC: - *val = get_reg_val(id, vcpu->arch.ic); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -1091,9 +1088,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ break; - case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d2deb9e..3da412e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -126,6 +126,8 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, */ vcpu->arch.entry_tb = get_tb(); vcpu->arch.entry_vtb = get_vtb(); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.entry_ic = mfspr(SPRN_IC); svcpu->in_use = true; } @@ -178,6 +180,8 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; out: -- cgit v0.10.2 From fb4188bad02f4871b26cf19b98e8d92499ca5d31 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 9 Jun 2014 01:16:32 +0200 Subject: KVM: PPC: Book3s PR: Disable AIL mode with OPAL When we're using PR KVM we must not allow the CPU to take interrupts in virtual mode, as the SLB does not contain host kernel mappings when running inside the guest context. To make sure we get good performance for non-KVM tasks but still properly functioning PR KVM, let's just disable AIL whenever a vcpu is scheduled in. This is fundamentally different from how we deal with AIL on pSeries type machines where we disable AIL for the whole machine as soon as a single KVM VM is up. The reason for that is easy - on pSeries we do not have control over per-cpu configuration of AIL. We also don't want to mess with CPU hotplug races and AIL configuration, so setting it per CPU is easier and more flexible. This patch fixes running PR KVM on POWER8 bare metal for me. Signed-off-by: Alexander Graf Acked-by: Paul Mackerras diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3da412e..8ea7da4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -71,6 +71,12 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->in_use = 0; svcpu_put(svcpu); #endif + + /* Disable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; @@ -91,6 +97,12 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + vcpu->cpu = -1; } -- cgit v0.10.2 From f6bf3a66227447f89f25b9db0ae39357decf2509 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 17:13:55 +0200 Subject: KVM: PPC: Book3s HV: Fix tlbie compile error Some compilers complain about uninitialized variables in the compute_tlbie_rb function. When you follow the code path you'll realize that we'll never get to that point, but the compiler isn't all that smart. So just default to 4k page sizes for everything, making the compiler happy and the code slightly easier to read. Signed-off-by: Alexander Graf Acked-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b..c7871f3 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -110,16 +110,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize) static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize, a_psize; + int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - if (!(v & HPTE_V_LARGE)) { - /* both base and actual psize is 4k */ - b_psize = MMU_PAGE_4K; - a_psize = MMU_PAGE_4K; - } else { + if (v & HPTE_V_LARGE) { for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { /* valid entries have a shift value */ -- cgit v0.10.2 From 568fccc43f901889b94b228cd0238916cb40e0bd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 16:37:38 +0200 Subject: KVM: PPC: Book3S PR: Handle hyp doorbell exits If we're running PR KVM in HV mode, we may get hypervisor doorbell interrupts. Handle those the same way we treat normal doorbells. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8ea7da4..3b82e86 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -988,6 +988,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_DECREMENTER: case BOOK3S_INTERRUPT_HV_DECREMENTER: case BOOK3S_INTERRUPT_DOORBELL: + case BOOK3S_INTERRUPT_H_DOORBELL: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; -- cgit v0.10.2 From 05a308c722822b0fbcc706b54be70f9bb9d52539 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:10 +1000 Subject: KVM: PPC: Book3S HV: Fix ABIv2 indirect branch issue To establish addressability quickly, ABIv2 requires the target address of the function being called to be in r12. Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 868347e..da1cac5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1913,8 +1913,8 @@ hcall_try_real_mode: lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont - add r3,r3,r4 - mtctr r3 + add r12,r3,r4 + mtctr r12 mr r3,r9 /* get vcpu pointer */ ld r4,VCPU_GPR(R4)(r9) bctrl -- cgit v0.10.2 From ad7d4584a225e3c1ac634dc803421fe842491ecf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:53 +1000 Subject: KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC() Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are assembly functions that are exported to modules and also require a valid r2. As such we need to use _GLOBAL_TOC so we provide a global entry point that establishes the TOC (r2). Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index da1cac5..64ac56f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675..4850a22 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -146,7 +146,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) -- cgit v0.10.2 From da166facd432d0edde509f941304915f9bed704d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:37:53 +0200 Subject: KVM: PPC: Book3S PR: Fix ABIv2 on LE We switched to ABIv2 on Little Endian systems now which gets rid of the dotted function names. Branch to the actual functions when we see such a system. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e3..d044b8b 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 4850a22..16c4d88 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) -- cgit v0.10.2 From f396df35188c59a5ecb83932190505ef297754e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 13:58:11 +0200 Subject: KVM: PPC: Book3S PR: Fix sparse endian checks While sending sparse with endian checks over the code base, it triggered at some places that were missing casts or had wrong types. Fix them up. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 52a63bf..f7c25c6 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) { long flags = kvmppc_get_gpr(vcpu, 4); long pte_index = kvmppc_get_gpr(vcpu, 5); - unsigned long pteg[2 * 8]; - unsigned long pteg_addr, i, *hpte; + __be64 pteg[2 * 8]; + __be64 *hpte; + unsigned long pteg_addr, i; long int ret; i = pte_index & 7; @@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); /* tsl = AVPN */ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; @@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) rb = compute_tlbie_rb(v, r, pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); - pte[0] = cpu_to_be64(pte[0]); - pte[1] = cpu_to_be64(pte[1]); + pte[0] = (__force u64)cpu_to_be64(pte[0]); + pte[1] = (__force u64)cpu_to_be64(pte[1]); copy_to_user((void __user *)pteg, pte, sizeof(pte)); ret = H_SUCCESS; -- cgit v0.10.2 From 1f0eeb7e1a88f46afa0f435cf7c34b0c84cf2394 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 18 Jun 2014 10:15:22 +0300 Subject: KVM: PPC: e500mc: Enhance tlb invalidation condition on vcpu schedule On vcpu schedule, the condition checked for tlb pollution is too loose. The tlb entries of a vcpu become polluted (vs stale) only when a different vcpu within the same logical partition runs in-between. Optimize the tlb invalidation condition keeping last_vcpu per logical partition id. With the new invalidation condition, a guest shows 4% performance improvement on P5020DS while running a memory stress application with the cpu oversubscribed, the other guest running a cpu intensive workload. Guest - old invalidation condition real 3.89 user 3.87 sys 0.01 Guest - enhanced invalidation condition real 3.75 user 3.73 sys 0.01 Host real 3.70 user 1.85 sys 0.00 The memory stress application accesses 4KB pages backed by 75% of available TLB0 entries: char foo[ENTRIES][4096] __attribute__ ((aligned (4096))); int main() { char bar; int i, j; for (i = 0; i < ITERATIONS; i++) for (j = 0; j < ENTRIES; j++) bar = foo[j][0]; return 0; } Signed-off-by: Mihai Caraman Reviewed-by: Scott Wood Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 17e4562..690499d 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); +static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) { @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_on_cpu) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_on_cpu) = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; } kvmppc_load_guest_fp(vcpu); -- cgit v0.10.2 From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 2 Jun 2014 11:02:59 +1000 Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling This provides a way for userspace controls which sPAPR hcalls get handled in the kernel. Each hcall can be individually enabled or disabled for in-kernel handling, except for H_RTAS. The exception for H_RTAS is because userspace can already control whether individual RTAS functions are handled in-kernel or not via the KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for H_RTAS is out of the normal sequence of hcall numbers. Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM. The args field of the struct kvm_enable_cap specifies the hcall number in args[0] and the enable/disable flag in args[1]; 0 means disable in-kernel handling (so that the hcall will always cause an exit to userspace) and 1 means enable. Enabling or disabling in-kernel handling of an hcall is effective across the whole VM. The ability for KVM_ENABLE_CAP to be used on a VM file descriptor on PowerPC is new, added by this commit. The KVM_CAP_ENABLE_CAP_VM capability advertises that this ability exists. When a VM is created, an initial set of hcalls are enabled for in-kernel handling. The set that is enabled is the set that have an in-kernel implementation at this point. Any new hcall implementations from this point onwards should not be added to the default set without a good reason. No distinction is made between real-mode and virtual-mode hcall implementations; the one setting controls them both. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe3649..5c54d19 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows: this function/index combination -6. Capabilities that can be enabled ------------------------------------ +6. Capabilities that can be enabled on vCPUs +-------------------------------------------- There are certain capabilities that change the behavior of the virtual CPU when enabled. To enable them, please see section 4.37. Below you can find a list of @@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd args[1] is the XICS CPU number (server ID) for this vcpu This capability connects the vcpu to an in-kernel XICS device. + + +7. Capabilities that can be enabled on VMs +------------------------------------------ + +There are certain capabilities that change the behavior of the virtual +machine when enabled. To enable them, please see section 4.37. Below +you can find a list of capabilities and what their effect on the VM +is when enabling them. + +The following information is provided along with the description: + + Architectures: which instruction set architectures provide this ioctl. + x86 includes both i386 and x86_64. + + Parameters: what parameters are accepted by the capability. + + Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) + are not detailed, but errors with specific meanings are. + + +7.1 KVM_CAP_PPC_ENABLE_HCALL + +Architectures: ppc +Parameters: args[0] is the sPAPR hcall number + args[1] is 0 to disable, 1 to enable in-kernel handling + +This capability controls whether individual sPAPR hypercalls (hcalls) +get handled by the kernel or not. Enabling or disabling in-kernel +handling of an hcall is effective across the VM. On creation, an +initial set of hcalls are enabled for in-kernel handling, which +consists of those hcalls for which in-kernel handlers were implemented +before this capability was implemented. If disabled, the kernel will +not to attempt to handle the hcall, but will always exit to userspace +to handle it. Note that it may not make sense to enable some and +disable others of a group of related hcalls, but KVM does not prevent +userspace from doing that. diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a20cc0b..052ab2a 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f9ae696..62b2cee 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -263,6 +264,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f5995a9..17ffcb4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -493,6 +493,7 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); + DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1562acf..cf445d2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -67,6 +67,8 @@ /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) +static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; int idx, rc; + if (req <= MAX_HCALL_OPCODE && + !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) + return RESUME_HOST; + switch (req) { case H_ENTER: idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); + /* Start out with the default set of hcalls enabled */ + memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, + sizeof(kvm->arch.enabled_hcalls)); + kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_REMOVE, + H_ENTER, + H_READ, + H_PROTECT, + H_BULK_REMOVE, + H_GET_TCE, + H_PUT_TCE, + H_SET_DABR, + H_SET_XDABR, + H_CEDE, + H_PROD, + H_CONFER, + H_REGISTER_VPA, +#ifdef CONFIG_KVM_XICS + H_EOI, + H_CPPR, + H_IPI, + H_IPOLL, + H_XIRR, + H_XIRR_X, +#endif + 0 +}; + +static void init_default_hcalls(void) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, default_enabled_hcalls); +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void) kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; + init_default_hcalls(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 64ac56f..33aaade 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1909,6 +1909,17 @@ hcall_try_real_mode: clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont + /* See if this hcall is enabled for in-kernel handling */ + ld r4, VCPU_KVM(r9) + srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ + sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ + add r4, r4, r0 + ld r0, KVM_ENABLED_HCALLS(r4) + rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ + srd r0, r0, r4 + andi. r0, r0, 1 + beq guest_exit_cont + /* Get pointer to handler, if any, and call it */ LOAD_REG_ADDR(r4, hcall_real_table) lwax r3,r3,r4 cmpwi r3,0 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3b82e86..123ac7d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) { mutex_init(&kvm->arch.hpt_mutex); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Start out with the default set of hcalls enabled */ + kvmppc_pr_init_default_hcalls(kvm); +#endif + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f7c25c6..eacaa6e 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + if (cmd <= MAX_HCALL_OPCODE && + !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) + return EMULATE_FAIL; + switch (cmd) { case H_ENTER: return kvmppc_h_pr_enter(vcpu); @@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } + + +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_ENTER, + H_REMOVE, + H_PROTECT, + H_BULK_REMOVE, + H_PUT_TCE, + H_CEDE, +#ifdef CONFIG_KVM_XICS + H_XIRR, + H_CPPR, + H_EOI, + H_IPI, + H_IPOLL, + H_XIRR_X, +#endif + 0 +}; + +void kvmppc_pr_init_default_hcalls(struct kvm *kvm) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls); +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 61c738a..3222a4d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: + case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif @@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + case KVM_CAP_PPC_ENABLE_HCALL: { + unsigned long hcall = cap->args[0]; + + r = -EINVAL; + if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || + cap->args[1] > 1) + break; + if (cap->args[1]) + set_bit(hcall / 4, kvm->arch.enabled_hcalls); + else + clear_bit(hcall / 4, kvm->arch.enabled_hcalls); + r = 0; + break; + } +#endif + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e11d8f1..0418b74 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_VM_ATTRIBUTES 101 #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 +#define KVM_CAP_PPC_ENABLE_HCALL 104 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From ae2113a4f1a6cd5a3cd3d75f394547922758e9ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 2 Jun 2014 11:03:00 +1000 Subject: KVM: PPC: Book3S: Allow only implemented hcalls to be enabled or disabled This adds code to check that when the KVM_CAP_PPC_ENABLE_HCALL capability is used to enable or disable in-kernel handling of an hcall, that the hcall is actually implemented by the kernel. If not an EINVAL error is returned. This also checks the default-enabled list of hcalls and prints a warning if any hcall there is not actually implemented. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5c54d19..6955318 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3039,3 +3039,7 @@ not to attempt to handle the hcall, but will always exit to userspace to handle it. Note that it may not make sense to enable some and disable others of a group of related hcalls, but KVM does not prevent userspace from doing that. + +If the hcall number specified is not one that has an in-kernel +implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL +error. diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 052ab2a..ceb70aa 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -146,6 +146,7 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache * extern int kvmppc_mmu_hpte_sysinit(void); extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); +extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); @@ -188,6 +189,8 @@ extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); +extern int kvmppc_hcall_impl_pr(unsigned long cmd); +extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9c89cdd..e2fd5a1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -228,7 +228,7 @@ struct kvmppc_ops { void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); - + int (*hcall_implemented)(unsigned long hcall); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 90aa5c7..bd75902 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -925,6 +925,11 @@ int kvmppc_core_check_processor_compat(void) return 0; } +int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) +{ + return kvm->arch.kvm_ops->hcall_implemented(hcall); +} + static int kvmppc_book3s_init(void) { int r; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cf445d2..c4377c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -645,6 +645,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvmppc_hcall_impl_hv(unsigned long cmd) +{ + switch (cmd) { + case H_CEDE: + case H_PROD: + case H_CONFER: + case H_REGISTER_VPA: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + + /* See if it's in the real-mode table */ + return kvmppc_hcall_impl_hv_realmode(cmd); +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -2451,9 +2473,13 @@ static unsigned int default_hcall_list[] = { static void init_default_hcalls(void) { int i; + unsigned int hcall; - for (i = 0; default_hcall_list[i]; ++i) - __set_bit(default_hcall_list[i] / 4, default_enabled_hcalls); + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_hv(hcall)); + __set_bit(hcall / 4, default_enabled_hcalls); + } } static struct kvmppc_ops kvm_ops_hv = { @@ -2488,6 +2514,7 @@ static struct kvmppc_ops kvm_ops_hv = { .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, + .hcall_implemented = kvmppc_hcall_impl_hv, }; static int kvmppc_book3s_init_hv(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cde8a6..3b41447 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -212,3 +212,16 @@ bool kvm_hv_mode_active(void) { return atomic_read(&hv_vm_count) != 0; } + +extern int hcall_real_table[], hcall_real_table_end[]; + +int kvmppc_hcall_impl_hv_realmode(unsigned long cmd) +{ + cmd /= 4; + if (cmd < hcall_real_table_end - hcall_real_table && + hcall_real_table[cmd]) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 33aaade..e66c1e38 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2042,6 +2042,7 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table + .globl hcall_real_table_end hcall_real_table_end: ignore_hdec: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 123ac7d..15fd6c2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1670,6 +1670,9 @@ static struct kvmppc_ops kvm_ops_pr = { .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, .fast_vcpu_kick = kvm_vcpu_kick, .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, +#ifdef CONFIG_PPC_BOOK3S_64 + .hcall_implemented = kvmppc_hcall_impl_pr, +#endif }; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index eacaa6e..6d0143f 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -309,6 +309,27 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } +int kvmppc_hcall_impl_pr(unsigned long cmd) +{ + switch (cmd) { + case H_ENTER: + case H_REMOVE: + case H_PROTECT: + case H_BULK_REMOVE: + case H_PUT_TCE: + case H_CEDE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + return 0; +} /* * List of hcall numbers to enable by default. @@ -337,7 +358,11 @@ static unsigned int default_hcall_list[] = { void kvmppc_pr_init_default_hcalls(struct kvm *kvm) { int i; + unsigned int hcall; - for (i = 0; default_hcall_list[i]; ++i) - __set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls); + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_pr(hcall)); + __set_bit(hcall / 4, kvm->arch.enabled_hcalls); + } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3222a4d..7efc2b7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1119,6 +1119,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || cap->args[1] > 1) break; + if (!kvmppc_book3s_hcall_implemented(kvm, hcall)) + break; if (cap->args[1]) set_bit(hcall / 4, kvm->arch.enabled_hcalls); else -- cgit v0.10.2 From 9642382e826066c2d30d1b23d1b45410cdd8e07d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 2 Jun 2014 11:03:01 +1000 Subject: KVM: PPC: Book3S HV: Add H_SET_MODE hcall handling This adds support for the H_SET_MODE hcall. This hcall is a multiplexer that has several functions, some of which are called rarely, and some which are potentially called very frequently. Here we add support for the functions that set the debug registers CIABR (Completed Instruction Address Breakpoint Register) and DAWR/DAWRX (Data Address Watchpoint Register and eXtension), since they could be updated by the guest as often as every context switch. This also adds a kvmppc_power8_compatible() function to test to see if a guest is compatible with POWER8 or not. The CIABR and DAWR/X only exist on POWER8. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 5dbbb29..85bc8c0 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -279,6 +279,12 @@ #define H_GET_24X7_DATA 0xF07C #define H_GET_PERF_COUNTER_INFO 0xF080 +/* Values for 2nd argument to H_SET_MODE */ +#define H_SET_MODE_RESOURCE_SET_CIABR 1 +#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 +#define H_SET_MODE_RESOURCE_LE 4 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c4377c7..7db9df2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -557,6 +557,48 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) + return true; + if ((!vcpu->arch.vcore->arch_compat) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return true; + return false; +} + +static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, + unsigned long resource, unsigned long value1, + unsigned long value2) +{ + switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (value2) + return H_P4; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + /* Guests can't breakpoint the hypervisor */ + if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) + return H_P3; + vcpu->arch.ciabr = value1; + return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr = value1; + vcpu->arch.dawrx = value2; + return H_SUCCESS; + default: + return H_TOO_HARD; + } +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -626,7 +668,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; - + case H_SET_MODE: + ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; case H_XIRR: case H_CPPR: case H_EOI: @@ -652,6 +701,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) case H_PROD: case H_CONFER: case H_REGISTER_VPA: + case H_SET_MODE: #ifdef CONFIG_KVM_XICS case H_XIRR: case H_CPPR: -- cgit v0.10.2 From d57cef91a0c30d3439a4d235eb94ab9efbf797a0 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 30 Jun 2014 15:54:58 +0300 Subject: KVM: PPC: e500: Fix default tlb for victim hint Tlb search operation used for victim hint relies on the default tlb set by the host. When hardware tablewalk support is enabled in the host, the default tlb is TLB1 which leads KVM to evict the bolted entry. Set and restore the default tlb when searching for victim hint. Signed-off-by: Mihai Caraman Reviewed-by: Scott Wood Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index d0918e0..8d24f78 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,7 +40,9 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -86,6 +88,7 @@ #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 +#define MAS4_TLBSEL_MASK MAS0_TLBSEL_MASK #define MAS4_TLBSELD(x) MAS0_TLBSEL(x) #define MAS4_INDD 0x00008000 /* Default IND */ #define MAS4_TSIZED(x) MAS1_TSIZE(x) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03..79677d7 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -107,11 +107,15 @@ static u32 get_host_mas0(unsigned long eaddr) { unsigned long flags; u32 mas0; + u32 mas4; local_irq_save(flags); mtspr(SPRN_MAS6, 0); + mas4 = mfspr(SPRN_MAS4); + mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK); asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); mas0 = mfspr(SPRN_MAS0); + mtspr(SPRN_MAS4, mas4); local_irq_restore(flags); return mas0; -- cgit v0.10.2 From 8f6822c4b9fac6e47414d2f1e11dbabda9bc2163 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:07:40 +0200 Subject: PPC: Add asm helpers for BE 32bit load/store From assembly code we might not only have to explicitly BE access 64bit values, but sometimes also 32bit ones. Add helpers that allow for easy use of lwzx/stwx in their respective byte-reverse or native form. Signed-off-by: Alexander Graf CC: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 4b237aa..21be8ae 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -34,10 +34,14 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ +#define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) +#define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else +#define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) +#define STWX_BE stringify_in_c(stwbrx) #define STDX_BE stringify_in_c(stdbrx) #endif -- cgit v0.10.2 From 6f22bd3265fb542acb2697026b953ec07298242d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:16:06 +0200 Subject: KVM: PPC: Book3S HV: Make HTAB code LE host aware When running on an LE host all data structures are kept in little endian byte order. However, the HTAB still needs to be maintained in big endian. So every time we access any HTAB we need to make sure we do so in the right byte order. Fix up all accesses to manually byte swap. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index ceb70aa..8ac5392 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -162,9 +162,9 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c7871f3..e504f88 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages; /* These bits are reserved in the guest view of the HPTE */ #define HPTE_GR_RESERVED HPTE_GR_MODIFIED -static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) +static inline long try_lock_hpte(__be64 *hpte, unsigned long bits) { unsigned long tmp, old; + __be64 be_lockbit, be_bits; + + /* + * We load/store in native endian, but the HTAB is in big endian. If + * we byte swap all data we apply on the PTE we're implicitly correct + * again. + */ + be_lockbit = cpu_to_be64(HPTE_V_HVLOCK); + be_bits = cpu_to_be64(bits); asm volatile(" ldarx %0,0,%2\n" " and. %1,%0,%3\n" " bne 2f\n" - " ori %0,%0,%4\n" + " or %0,%0,%4\n" " stdcx. %0,0,%2\n" " beq+ 2f\n" " mr %1,%3\n" "2: isync" : "=&r" (tmp), "=&r" (old) - : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) + : "r" (hpte), "r" (be_bits), "r" (be_lockbit) : "cc", "memory"); return old == 0; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8056107..2d154d9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -450,7 +450,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long slb_v; unsigned long pp, key; unsigned long v, gr; - unsigned long *hptep; + __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -473,13 +473,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_enable(); return -ENOENT; } - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hptep[0] & ~HPTE_V_HVLOCK; + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; gr = kvm->arch.revmap[index].guest_rpte; /* Unlock the HPTE */ asm volatile("lwsync" : : : "memory"); - hptep[0] = v; + hptep[0] = cpu_to_be64(v); preempt_enable(); gpte->eaddr = eaddr; @@ -583,7 +583,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long *hptep, hpte[3], r; + unsigned long hpte[3], r; + __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; unsigned long gpa, gfn, hva, pfn; @@ -606,16 +607,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; index = vcpu->arch.pgfault_index; - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; - hpte[1] = hptep[1]; + hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + hpte[1] = be64_to_cpu(hptep[1]); hpte[2] = r = rev->guest_rpte; asm volatile("lwsync" : : : "memory"); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); preempt_enable(); if (hpte[0] != vcpu->arch.pgfault_hpte[0] || @@ -731,8 +732,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || - rev->guest_rpte != hpte[2]) + if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || + be64_to_cpu(hptep[1]) != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -752,20 +754,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; r &= rcbits | ~(HPTE_R_R | HPTE_R_C); - if (hptep[0] & HPTE_V_VALID) { + if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) { /* HPTE was previously valid, so we need to invalidate it */ unlock_rmap(rmap); - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, index); /* don't lose previous R and C bits */ - r |= hptep[1] & (HPTE_R_R | HPTE_R_C); + r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } - hptep[1] = r; + hptep[1] = cpu_to_be64(r); eieio(); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) @@ -784,7 +786,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; out_unlock: - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); goto out_put; } @@ -860,7 +862,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long h, i, j; - unsigned long *hptep; + __be64 *hptep; unsigned long ptel, psize, rcbits; for (;;) { @@ -876,11 +878,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, * rmap chain lock. */ i = *rmapp & KVMPPC_RMAP_INDEX; - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); continue; } @@ -899,14 +901,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ ptel = rev[i].guest_rpte; - psize = hpte_page_size(hptep[0], ptel); - if ((hptep[0] & HPTE_V_VALID) && + psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { if (kvm->arch.using_mmu_notifiers) - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ - rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); + rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; if (rcbits & ~rev[i].guest_rpte) { rev[i].guest_rpte = ptel | rcbits; @@ -914,7 +916,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } } unlock_rmap(rmapp); - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } return 0; } @@ -961,7 +963,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long head, i, j; - unsigned long *hptep; + __be64 *hptep; int ret = 0; retry: @@ -977,23 +979,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* If this HPTE isn't referenced, ignore it */ - if (!(hptep[1] & HPTE_R_R)) + if (!(be64_to_cpu(hptep[1]) & HPTE_R_R)) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptep[1]) & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); if (!(rev[i].guest_rpte & HPTE_R_R)) { rev[i].guest_rpte |= HPTE_R_R; @@ -1001,7 +1004,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } ret = 1; } - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1035,7 +1038,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, do { hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; - if (hp[1] & HPTE_R_R) + if (be64_to_cpu(hp[1]) & HPTE_R_R) goto out; } while ((i = j) != head); } @@ -1075,7 +1078,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) unsigned long head, i, j; unsigned long n; unsigned long v, r; - unsigned long *hptep; + __be64 *hptep; int npages_dirty = 0; retry: @@ -1091,7 +1094,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + unsigned long hptep1; + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* @@ -1108,29 +1112,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) * Otherwise we need to do the tlbie even if C==0 in * order to pick up any delayed writeback of C. */ - if (!(hptep[1] & HPTE_R_C) && - (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) + hptep1 = be64_to_cpu(hptep[1]); + if (!(hptep1 & HPTE_R_C) && + (!hpte_is_writable(hptep1) || vcpus_running(kvm))) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK)) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if (!(hptep[0] & HPTE_V_VALID)) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) continue; /* need to make it temporarily absent so C is stable */ - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); - v = hptep[0]; - r = hptep[1]; + v = be64_to_cpu(hptep[0]); + r = be64_to_cpu(hptep[1]); if (r & HPTE_R_C) { - hptep[1] = r & ~HPTE_R_C; + hptep[1] = cpu_to_be64(r & ~HPTE_R_C); if (!(rev[i].guest_rpte & HPTE_R_C)) { rev[i].guest_rpte |= HPTE_R_C; note_hpte_modification(kvm, &rev[i]); @@ -1143,7 +1148,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); v |= HPTE_V_VALID; - hptep[0] = v; + hptep[0] = cpu_to_be64(v); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1307,7 +1312,7 @@ struct kvm_htab_ctx { * Returns 1 if this HPT entry has been modified or has pending * R/C bit changes. */ -static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp) { unsigned long rcbits_unset; @@ -1316,13 +1321,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) /* Also need to consider changes in reference and changed bits */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptp[1]) & rcbits_unset)) return 1; return 0; } -static long record_hpte(unsigned long flags, unsigned long *hptp, +static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { @@ -1337,10 +1343,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, return 0; valid = 0; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) { valid = 1; if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && - !(hptp[0] & HPTE_V_BOLTED)) + !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED)) valid = 0; } if (valid != want_valid) @@ -1352,7 +1358,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, preempt_disable(); while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); - v = hptp[0]; + v = be64_to_cpu(hptp[0]); /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1360,9 +1366,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & hptp[1])) { - revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | - HPTE_GR_MODIFIED; + if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { + revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1381,13 +1387,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, revp->guest_rpte = r; } asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hptp[0] &= ~HPTE_V_HVLOCK; + hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); if (!(valid == want_valid && (first_pass || dirty))) ok = 0; } - hpte[0] = v; - hpte[1] = r; + hpte[0] = cpu_to_be64(v); + hpte[1] = cpu_to_be64(r); return ok; } @@ -1397,7 +1403,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, struct kvm_htab_ctx *ctx = file->private_data; struct kvm *kvm = ctx->kvm; struct kvm_get_htab_header hdr; - unsigned long *hptp; + __be64 *hptp; struct revmap_entry *revp; unsigned long i, nb, nw; unsigned long __user *lbuf; @@ -1413,7 +1419,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, flags = ctx->flags; i = ctx->index; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); revp = kvm->arch.revmap + i; lbuf = (unsigned long __user *)buf; @@ -1497,7 +1503,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, unsigned long i, j; unsigned long v, r; unsigned long __user *lbuf; - unsigned long *hptp; + __be64 *hptp; unsigned long tmp[2]; ssize_t nb; long int err, ret; @@ -1539,7 +1545,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) break; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { err = -EFAULT; @@ -1551,7 +1557,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, lbuf += 2; nb += HPTE_SIZE; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); err = -EIO; ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, @@ -1577,7 +1583,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, } for (j = 0; j < hdr.n_invalid; ++j) { - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); ++i; hptp += 2; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e62243..e5c6063 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); } -static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) +static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) { asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hpte[0] = hpte_v; + hpte[0] = cpu_to_be64(hpte_v); } long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; @@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { - if ((*hpte & HPTE_V_VALID) == 0 && + if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; @@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, */ hpte -= 16; for (i = 0; i < 8; ++i) { + u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) + pte = be64_to_cpu(*hpte); + if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; - *hpte &= ~HPTE_V_HVLOCK; + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); hpte += 2; } if (i == 8) @@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte_index += i; } else { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ + u64 pte; + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { - *hpte &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(*hpte); + if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_PTEG_FULL; } } @@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } - hpte[1] = ptel; + hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); - hpte[0] = pteh; + hpte[0] = cpu_to_be64(pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; @@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) { - unsigned long *hpte; + __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || - ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (pte & avpn) != 0)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - v = hpte[0] & ~HPTE_V_HVLOCK; + v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - hpte[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(v, hpte[1], pte_index); + u64 pte1; + + pte1 = be64_to_cpu(hpte[1]); + hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(v, pte1, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); + remove_revmap_chain(kvm, pte_index, rev, v, pte1); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); @@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; unsigned long *args = &vcpu->arch.gpr[4]; - unsigned long *hp, *hptes[4], tlbrb[4]; + __be64 *hp, *hptes[4]; + unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; + u64 hp0; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - hp = (unsigned long *) - (kvm->arch.hpt_virt + (pte_index << 4)); + hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); /* to avoid deadlock, don't spin except for first */ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { if (n) @@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) cpu_relax(); } found = 0; - if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { + hp0 = be64_to_cpu(hp[0]); + if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ found = 1; break; case 1: /* andcond */ - if (!(hp[0] & args[j + 1])) + if (!(hp0 & args[j + 1])) found = 1; break; case 2: /* AVPN */ - if ((hp[0] & ~0x7fUL) == args[j + 1]) + if ((hp0 & ~0x7fUL) == args[j + 1]) found = 1; break; } } if (!found) { - hp[0] &= ~HPTE_V_HVLOCK; + hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); args[j] = ((0x90 | flags) << 56) + pte_index; continue; } @@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); note_hpte_modification(kvm, rev); - if (!(hp[0] & HPTE_V_VALID)) { + if (!(hp0 & HPTE_V_VALID)) { /* insert R and C bits from PTE */ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); @@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) continue; } - hp[0] &= ~HPTE_V_VALID; /* leave it locked */ - tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); + /* leave it locked */ + hp[0] &= ~cpu_to_be64(HPTE_V_VALID); + tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), + be64_to_cpu(hp[1]), pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) pte_index = args[j] & ((1ul << 56) - 1); hp = hptes[k]; rev = revs[k]; - remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); + remove_revmap_chain(kvm, pte_index, rev, + be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; @@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long va) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } - v = hpte[0]; + v = pte; bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (hpte[1] & ~mask) | bits; + r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = v & ~HPTE_V_VALID; + hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest @@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } } } - hpte[1] = r; + hpte[1] = cpu_to_be64(r); eieio(); - hpte[0] = v & ~HPTE_V_HVLOCK; + hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } @@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte, v, r; + __be64 *hpte; + unsigned long v, r; int i, n = 1; struct revmap_entry *rev = NULL; @@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); for (i = 0; i < n; ++i, ++pte_index) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } -void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; - hptep[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; unsigned char rbyte; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - rbyte = (hptep[1] & ~HPTE_R_R) >> 8; + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); + rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; do_tlbies(kvm, &rb, 1, 1, false); @@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long somask; unsigned long vsid, hash; unsigned long avpn; - unsigned long *hpte; + __be64 *hpte; unsigned long mask, val; unsigned long v, r; @@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ - v = hpte[i] & ~HPTE_V_HVLOCK; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -810,8 +830,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = hpte[i] & ~HPTE_V_HVLOCK; - r = hpte[i+1]; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[i+1]); /* * Check the HPTE again, including large page size @@ -825,7 +845,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, return (hash << 3) + (i >> 1); /* Unlock and move on */ - hpte[i] = v; + hpte[i] = cpu_to_be64(v); } if (val & HPTE_V_SECONDARY) @@ -854,7 +874,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct kvm *kvm = vcpu->kvm; long int index; unsigned long v, r, gr; - unsigned long *hpte; + __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; @@ -870,9 +890,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return status; /* there really was no HPTE */ return 0; /* for prot fault, HPTE disappeared */ } - hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); rev = real_vmalloc_addr(&kvm->arch.revmap[index]); gr = rev->guest_rpte; -- cgit v0.10.2 From 02407552256111479fbfd23a3e01218b399aaa35 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:34:19 +0200 Subject: KVM: PPC: Book3S HV: Access guest VPA in BE There are a few shared data structures between the host and the guest. Most of them get registered through the VPA interface. These data structures are defined to always be in big endian byte order, so let's make sure we always access them in big endian. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7db9df2..f1281c4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -272,7 +272,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) { vpa->__old_status |= LPPACA_OLD_SHARED_PROC; - vpa->yield_count = 1; + vpa->yield_count = cpu_to_be32(1); } static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, @@ -295,8 +295,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, struct reg_vpa { u32 dummy; union { - u16 hword; - u32 word; + __be16 hword; + __be32 word; } length; }; @@ -335,9 +335,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (va == NULL) return H_PARAMETER; if (subfunc == H_VPA_REG_VPA) - len = ((struct reg_vpa *)va)->length.hword; + len = be16_to_cpu(((struct reg_vpa *)va)->length.hword); else - len = ((struct reg_vpa *)va)->length.word; + len = be32_to_cpu(((struct reg_vpa *)va)->length.word); kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ @@ -542,18 +542,18 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, return; memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; - dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = now + vc->tb_offset; - dt->enqueue_to_dispatch_time = stolen; - dt->srr0 = kvmppc_get_pc(vcpu); - dt->srr1 = vcpu->arch.shregs.msr; + dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); + dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); + dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_ptr = dt; /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); - vpa->dtl_idx = ++vcpu->arch.dtl_index; + vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); vcpu->arch.dtl.dirty = true; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 3a5c568..d562c8e 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu) return; /* Sanity check */ - n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end) return; /* Load up the SLB from that */ for (i = 0; i < n; ++i) { - unsigned long rb = slb->save_area[i].esid; - unsigned long rs = slb->save_area[i].vsid; + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); rb = (rb & ~0xFFFul) | i; /* insert entry number */ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); -- cgit v0.10.2 From 0865a583a4881975cc4b621f4886c02f01600302 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:36:17 +0200 Subject: KVM: PPC: Book3S HV: Access host lppaca and shadow slb in BE Some data structures are always stored in big endian. Among those are the LPPACA fields as well as the shadow slb. These structures might be shared with a hypervisor. So whenever we access those fields, make sure we do so in big endian byte order. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e66c1e38..bf5270e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,10 +32,6 @@ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) -#ifdef __LITTLE_ENDIAN__ -#error Need to fix lppaca and SLB shadow accesses in little endian mode -#endif - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -595,9 +591,10 @@ kvmppc_got_guest: ld r3, VCPU_VPA(r4) cmpdi r3, 0 beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) + li r6, LPPACA_YIELDCOUNT + LWZX_BE r5, r3, r6 addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) + STWX_BE r5, r3, r6 li r6, 1 stb r6, VCPU_VPA_DIRTY(r4) 25: @@ -1442,9 +1439,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) + li r4, LPPACA_YIELDCOUNT + LWZX_BE r3, r8, r4 addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) + STWX_BE r3, r8, r4 li r3, 1 stb r3, VCPU_VPA_DIRTY(r9) 25: @@ -1757,8 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r8) - ld r6,SLBSHADOW_SAVEAREA+8(r8) + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 andis. r7,r5,SLB_ESID_V@h beq 1f slbmte r6,r5 -- cgit v0.10.2 From 76d072fb05f646eb180f161bbe06ab185af52f38 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:37:52 +0200 Subject: KVM: PPC: Book3S HV: Access XICS in BE On the exit path from the guest we check what type of interrupt we received if we received one. This means we're doing hardware access to the XICS interrupt controller. However, when running on a little endian system, this access is byte reversed. So let's make sure to swizzle the bytes back again and virtually make XICS accesses big endian. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bf5270e..364ca0c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2350,7 +2350,18 @@ kvmppc_read_intr: cmpdi r6, 0 beq- 1f lwzcix r0, r6, r7 - rlwinm. r3, r0, 0, 0xffffff + /* + * Save XIRR for later. Since we get in in reverse endian on LE + * systems, save it byte reversed and fetch it back in host endian. + */ + li r3, HSTATE_SAVED_XIRR + STWX_BE r0, r3, r13 +#ifdef __LITTLE_ENDIAN__ + lwz r3, HSTATE_SAVED_XIRR(r13) +#else + mr r3, r0 +#endif + rlwinm. r3, r3, 0, 0xffffff sync beq 1f /* if nothing pending in the ICP */ @@ -2382,10 +2393,9 @@ kvmppc_read_intr: li r3, -1 1: blr -42: /* It's not an IPI and it's for the host, stash it in the PACA - * before exit, it will be picked up by the host ICP driver +42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in + * the PACA earlier, it will be picked up by the host ICP driver */ - stw r0, HSTATE_SAVED_XIRR(r13) li r3, 1 b 1b -- cgit v0.10.2 From 9bf163f86d0dc2f9070d9b1b8c27cedcf8eec816 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:41:15 +0200 Subject: KVM: PPC: Book3S HV: Fix ABIv2 on LE For code that doesn't live in modules we can just branch to the real function names, giving us compatibility with ABIv1 and ABIv2. Do this for the compiled-in code of HV KVM. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 364ca0c..855521e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -668,9 +668,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) mr r31, r4 addi r3, r31, VCPU_FPRS_TM - bl .load_fp_state + bl load_fp_state addi r3, r31, VCPU_VRS_TM - bl .load_vr_state + bl load_vr_state mr r4, r31 lwz r7, VCPU_VRSAVE_TM(r4) mtspr SPRN_VRSAVE, r7 @@ -1414,9 +1414,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Save FP/VSX. */ addi r3, r9, VCPU_FPRS_TM - bl .store_fp_state + bl store_fp_state addi r3, r9, VCPU_VRS_TM - bl .store_vr_state + bl store_vr_state mfspr r6, SPRN_VRSAVE stw r6, VCPU_VRSAVE_TM(r9) 1: @@ -2430,11 +2430,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r3,VCPU_FPRS - bl .store_fp_state + bl store_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .store_vr_state + bl store_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE @@ -2466,11 +2466,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r4,VCPU_FPRS - bl .load_fp_state + bl load_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .load_vr_state + bl load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif lwz r7,VCPU_VRSAVE(r31) -- cgit v0.10.2 From 6947f948f06128409b94306afaca5ece873ee5a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Jun 2014 10:39:38 +0200 Subject: KVM: PPC: Book3S HV: Enable for little endian hosts Now that we've fixed all the issues that HV KVM code had on little endian hosts, we can enable it in the kernel configuration for users to play with. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index d6a53b9..8aeeda1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -75,7 +75,6 @@ config KVM_BOOK3S_64 config KVM_BOOK3S_64_HV tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 - depends on !CPU_LITTLE_ENDIAN select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA -- cgit v0.10.2 From debf27d6b92d7a98e0153ca8e3a990ea7a45b4da Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Fri, 4 Jul 2014 11:17:28 +0300 Subject: KVM: PPC: e500: Emulate power management control SPR For FSL e6500 core the kernel uses power management SPR register (PWRMGTCR0) to enable idle power down for cores and devices by setting up the idle count period at boot time. With the host already controlling the power management configuration the guest could simply benefit from it, so emulate guest request as a general store. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 62b2cee..faf2f0e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -584,6 +584,7 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 002d517..c99c40e 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -250,6 +250,14 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va spr_val); break; + case SPRN_PWRMGTCR0: + /* + * Guest relies on host power management configurations + * Treat the request as a general store + */ + vcpu->arch.pwrmgtcr0 = spr_val; + break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; @@ -368,6 +376,10 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v *spr_val = vcpu->arch.eptcfg; break; + case SPRN_PWRMGTCR0: + *spr_val = vcpu->arch.pwrmgtcr0; + break; + /* extra exceptions */ case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; -- cgit v0.10.2 From 1287cb3fa85cd4a0d18402f6a23e1d4c6a9d7b8b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jul 2014 12:52:51 +0200 Subject: KVM: PPC: Book3S: Move vcore definition to end of kvm_arch struct When building KVM with a lot of vcores (NR_CPUS is big), we can potentially get out of the ld immediate range for dereferences inside that struct. Move the array to the end of our kvm_arch struct. This fixes compilation issues with NR_CPUS=2048 for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index faf2f0e..855ba4d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,7 +255,6 @@ struct kvm_arch { atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; - struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -273,6 +272,10 @@ struct kvm_arch { struct kvmppc_xics *xics; #endif struct kvmppc_ops *kvm_ops; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* This array can grow quite large, keep it at the end */ + struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; +#endif }; /* -- cgit v0.10.2 From 17824b5afcf273f6fc3e04df2d2a9d90d5c864fd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 10 Jul 2014 19:19:35 +0200 Subject: KVM: PPC: Deflect page write faults properly in kvmppc_st When we have a page that we're not allowed to write to, xlate() will already tell us -EPERM on lookup of that page. With the code as is we change it into a "page missing" error which a guest may get confused about. Instead, just tell the caller about the -EPERM directly. This fixes Mac OS X guests when run with DCBZ32 emulation. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index bd75902..9624c56 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -418,11 +418,13 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; + int r; vcpu->stat.st++; - if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) - return -ENOENT; + r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte); + if (r < 0) + return r; *eaddr = pte.raddr; -- cgit v0.10.2 From 2e27ecc961044a2c5c05a4283888352961886a87 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 10 Jul 2014 19:22:03 +0200 Subject: KVM: PPC: Book3S: Stop PTE lookup on write errors When a page lookup failed because we're not allowed to write to the page, we should not overwrite that value with another lookup on the second PTEG which will return "page not found". Instead, we should just tell the caller that we had a permission problem. This fixes Mac OS X guests looping endlessly in page lookup code for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 93503bb..cd0b073 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, true); - if (r < 0) + if (r == -ENOENT) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, false); -- cgit v0.10.2 From c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 11 Jul 2014 02:58:58 +0200 Subject: KVM: PPC: Book3S: Add hack for split real mode Today we handle split real mode by mapping both instruction and data faults into a special virtual address space that only exists during the split mode phase. This is good enough to catch 32bit Linux guests that use split real mode for copy_from/to_user. In this case we're always prefixed with 0xc0000000 for our instruction pointer and can map the user space process freely below there. However, that approach fails when we're running KVM inside of KVM. Here the 1st level last_inst reader may well be in the same virtual page as a 2nd level interrupt handler. It also fails when running Mac OS X guests. Here we have a 4G/4G split, so a kernel copy_from/to_user implementation can easily overlap with user space addresses. The architecturally correct way to fix this would be to implement an instruction interpreter in KVM that kicks in whenever we go into split real mode. This interpreter however would not receive a great amount of testing and be a lot of bloat for a reasonably isolated corner case. So I went back to the drawing board and tried to come up with a way to make split real mode work with a single flat address space. And then I realized that we could get away with the same trick that makes it work for Linux: Whenever we see an instruction address during split real mode that may collide, we just move it higher up the virtual address space to a place that hopefully does not collide (keep your fingers crossed!). That approach does work surprisingly well. I am able to successfully run Mac OS X guests with KVM and QEMU (no split real mode hacks like MOL) when I apply a tiny timing probe hack to QEMU. I'd say this is a win over even more broken split real mode :). Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 9601741..3f3e530 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -131,6 +131,7 @@ #define BOOK3S_HFLAG_NATIVE_PS 0x8 #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 #define BOOK3S_HFLAG_NEW_TLBIE 0x20 +#define BOOK3S_HFLAG_SPLIT_HACK 0x40 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8ac5392..b1cf18d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -324,4 +324,7 @@ static inline bool is_kvmppc_resume_guest(int r) /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) +#define SPLIT_HACK_MASK 0xff000000 +#define SPLIT_HACK_OFFS 0xfb000000 + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9624c56..1d13764 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} +EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { if (!is_kvmppc_hv_enabled(vcpu->kvm)) @@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { + kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); @@ -384,6 +396,13 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, pte->may_write = true; pte->may_execute = true; r = 0; + + if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR && + !data) { + if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte->raddr &= ~SPLIT_HACK_MASK; + } } return r; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 15fd6c2..6125f60 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define HW_PAGE_SIZE PAGE_SIZE #endif +static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + return (msr & (MSR_IR|MSR_DR)) == MSR_DR; +} + +static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + ulong pc = kvmppc_get_pc(vcpu); + + /* We are in DR only split real mode */ + if ((msr & (MSR_IR|MSR_DR)) != MSR_DR) + return; + + /* We have not fixed up the guest already */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) + return; + + /* The code is in fixupable address space */ + if (pc & SPLIT_HACK_MASK) + return; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK; + kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); +} + +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -81,6 +110,9 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif + + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -95,6 +127,9 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif + if (kvmppc_is_split_real(vcpu)) + kvmppc_unfixup_split_real(vcpu); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); @@ -322,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) } } + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + else + kvmppc_unfixup_split_real(vcpu); + if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); @@ -522,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; case MSR_DR: + if (!data && + (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte.raddr &= ~SPLIT_HACK_MASK; + /* fall through */ case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -886,6 +931,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ -- cgit v0.10.2 From 89b68c96a24f6520c8815f88254c8e7d09aeb40e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 16:37:12 +0200 Subject: KVM: PPC: Book3S: Make magic page properly 4k mappable The magic page is defined as a 4k page of per-vCPU data that is shared between the guest and the host to accelerate accesses to privileged registers. However, when the host is using 64k page size granularity we weren't quite as strict about that rule anymore. Instead, we partially treated all of the upper 64k as magic page and mapped only the uppermost 4k with the actual magic contents. This works well enough for Linux which doesn't use any memory in kernel space in the upper 64k, but Mac OS X got upset. So this patch makes magic page actually stay in a 4k range even on 64k page size hosts. This patch fixes magic page usage with Mac OS X (using MOL) on 64k PAGE_SIZE hosts for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index b1cf18d..20fb6f2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -158,7 +158,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 1d13764..31facfc 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -354,18 +354,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable) { - ulong mp_pa = vcpu->arch.magic_page_pa; + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; + gfn_t gfn = gpa >> PAGE_SHIFT; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; /* Magic page override */ - if (unlikely(mp_pa) && - unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == - ((mp_pa & PAGE_MASK) & KVM_PAM))) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) { ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; pfn_t pfn; @@ -378,7 +378,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } -EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); +EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, bool iswrite, struct kvmppc_pte *pte) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 678e753..2035d16 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, - iswrite, &writable); + hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(hpaddr)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", - orig_pte->eaddr); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0ac9839..b982d92 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, smp_rmb(); /* Get host physical address for gpa */ - pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); + pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(pfn)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 6125f60..e40765f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -511,19 +511,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; - if (unlikely(mp_pa) && - unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { return 1; } - return kvm_is_visible_gfn(vcpu->kvm, gfn); + return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -614,7 +614,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && - kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + kvmppc_visible_gpa(vcpu, pte.raddr)) { if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { /* * There is already a host HPTE there, presumably @@ -1387,8 +1387,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) goto uninit_vcpu; - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); + vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7efc2b7..fe0257a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) vcpu->arch.magic_page_pa = param1 & ~0xfffULL; vcpu->arch.magic_page_ea = param2 & ~0xfffULL; +#ifdef CONFIG_PPC_64K_PAGES + /* + * Make sure our 4k magic page is in the same window of a 64k + * page within the guest and within the host's page. + */ + if ((vcpu->arch.magic_page_pa & 0xf000) != + ((ulong)vcpu->arch.shared & 0xf000)) { + void *old_shared = vcpu->arch.shared; + ulong shared = (ulong)vcpu->arch.shared; + void *new_shared; + + shared &= PAGE_MASK; + shared |= vcpu->arch.magic_page_pa & 0xf000; + new_shared = (void*)shared; + memcpy(new_shared, old_shared, 0x1000); + vcpu->arch.shared = new_shared; + } +#endif + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; r = EV_SUCCESS; -- cgit v0.10.2 From 1dc0c5b88cae1c211b37fed9187379a692bb469b Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:35 +0530 Subject: kvm: ppc: bookehv: Added wrapper macros for shadow registers There are shadow registers like, GSPRG[0-3], GSRR0, GSRR1 etc on BOOKE-HV and these shadow registers are guest accessible. So these shadow registers needs to be updated on BOOKE-HV. This patch adds new macro for get/set helper of shadow register . Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e2fd5a1..6520d09 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -472,8 +472,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } +#define SPRNG_WRAPPER_GET(reg, e500hv_spr) \ +static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + return mfspr(e500hv_spr); \ +} \ + +#define SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ +{ \ + mtspr(e500hv_spr, val); \ +} \ + #define SHARED_WRAPPER_GET(reg, size) \ -static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ return be##size##_to_cpu(vcpu->arch.shared->reg); \ @@ -494,14 +506,30 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ +#define SPRNG_WRAPPER(reg, e500hv_spr) \ + SPRNG_WRAPPER_GET(reg, e500hv_spr) \ + SPRNG_WRAPPER_SET(reg, e500hv_spr) \ + +#ifdef CONFIG_KVM_BOOKE_HV + +#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ + SPRNG_WRAPPER(reg, e500hv_spr) \ + +#else + +#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ + SHARED_WRAPPER(reg, size) \ + +#endif + SHARED_WRAPPER(critical, 64) -SHARED_WRAPPER(sprg0, 64) -SHARED_WRAPPER(sprg1, 64) -SHARED_WRAPPER(sprg2, 64) -SHARED_WRAPPER(sprg3, 64) -SHARED_WRAPPER(srr0, 64) -SHARED_WRAPPER(srr1, 64) -SHARED_WRAPPER(dar, 64) +SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) +SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) +SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) +SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) +SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) +SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) +SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { -- cgit v0.10.2 From 31579eea69c8088685a4dc82784ca839cfd5ae73 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:36 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers of SRR0 and SRR1 Use kvmppc_set_srr0/srr1() and kvmppc_get_srr0/srr1() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ab62109..3b43adb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -266,13 +266,8 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GSRR0, srr0); - mtspr(SPRN_GSRR1, srr1); -#else - vcpu->arch.shared->srr0 = srr0; - vcpu->arch.shared->srr1 = srr1; -#endif + kvmppc_set_srr0(vcpu, srr0); + kvmppc_set_srr1(vcpu, srr1); } static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) @@ -1265,8 +1260,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; - regs->srr0 = vcpu->arch.shared->srr0; - regs->srr1 = vcpu->arch.shared->srr1; + regs->srr0 = kvmppc_get_srr0(vcpu); + regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; regs->sprg0 = vcpu->arch.shared->sprg0; regs->sprg1 = vcpu->arch.shared->sprg1; @@ -1293,8 +1288,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.shared->srr0 = regs->srr0; - vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_srr0(vcpu, regs->srr0); + kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; -- cgit v0.10.2 From a5414d4b5ed8a2b10a1c8fe84f30ed4f19d0a35e Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:37 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers of SPRN_DEAR Uses kvmppc_set_dar() and kvmppc_get_dar() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3b43adb..8e8b14b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GDEAR); -#else - return vcpu->arch.shared->dar; -#endif -} - -static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GDEAR, dear); -#else - vcpu->arch.shared->dar = dear; -#endif -} - static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_BOOKE_HV @@ -447,7 +429,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (update_esr == true) set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - set_guest_dear(vcpu, vcpu->arch.queued_dear); + kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); @@ -1317,7 +1299,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; sregs->u.e.esr = get_guest_esr(vcpu); - sregs->u.e.dear = get_guest_dear(vcpu); + sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -1335,7 +1317,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; set_guest_esr(vcpu, sregs->u.e.esr); - set_guest_dear(vcpu, sregs->u.e.dear); + kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); -- cgit v0.10.2 From dc168549d9a0fb55d3021ee408adf25786cfda23 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:38 +0530 Subject: kvm: ppc: booke: Add shared struct helpers of SPRN_ESR Add and use kvmppc_set_esr() and kvmppc_get_esr() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6520d09..c95bdbd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -530,6 +530,7 @@ SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) +SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8e8b14b..25a7e70 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GESR); -#else - return vcpu->arch.shared->esr; -#endif -} - -static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GESR, esr); -#else - vcpu->arch.shared->esr = esr; -#endif -} - static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_BOOKE_HV @@ -427,7 +409,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - set_guest_esr(vcpu, vcpu->arch.queued_esr); + kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { @@ -1298,7 +1280,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = get_guest_esr(vcpu); + sregs->u.e.esr = kvmppc_get_esr(vcpu); sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; @@ -1316,7 +1298,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - set_guest_esr(vcpu, sregs->u.e.esr); + kvmppc_set_esr(vcpu, sregs->u.e.esr); kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); -- cgit v0.10.2 From c1b8a01bf959e87cff9c9dcaf51cb5c1d60a2c52 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:39 +0530 Subject: kvm: ppc: booke: Use the shared struct helpers for SPRN_SPRG0-7 Use kvmppc_set_sprg[0-7]() and kvmppc_get_sprg[0-7]() helper functions Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 25a7e70..34562d4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1227,14 +1227,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->srr0 = kvmppc_get_srr0(vcpu); regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.shared->sprg0; - regs->sprg1 = vcpu->arch.shared->sprg1; - regs->sprg2 = vcpu->arch.shared->sprg2; - regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.shared->sprg4; - regs->sprg5 = vcpu->arch.shared->sprg5; - regs->sprg6 = vcpu->arch.shared->sprg6; - regs->sprg7 = vcpu->arch.shared->sprg7; + regs->sprg0 = kvmppc_get_sprg0(vcpu); + regs->sprg1 = kvmppc_get_sprg1(vcpu); + regs->sprg2 = kvmppc_get_sprg2(vcpu); + regs->sprg3 = kvmppc_get_sprg3(vcpu); + regs->sprg4 = kvmppc_get_sprg4(vcpu); + regs->sprg5 = kvmppc_get_sprg5(vcpu); + regs->sprg6 = kvmppc_get_sprg6(vcpu); + regs->sprg7 = kvmppc_get_sprg7(vcpu); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -1255,14 +1255,14 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_srr0(vcpu, regs->srr0); kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); - vcpu->arch.shared->sprg0 = regs->sprg0; - vcpu->arch.shared->sprg1 = regs->sprg1; - vcpu->arch.shared->sprg2 = regs->sprg2; - vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.shared->sprg4 = regs->sprg4; - vcpu->arch.shared->sprg5 = regs->sprg5; - vcpu->arch.shared->sprg6 = regs->sprg6; - vcpu->arch.shared->sprg7 = regs->sprg7; + kvmppc_set_sprg0(vcpu, regs->sprg0); + kvmppc_set_sprg1(vcpu, regs->sprg1); + kvmppc_set_sprg2(vcpu, regs->sprg2); + kvmppc_set_sprg3(vcpu, regs->sprg3); + kvmppc_set_sprg4(vcpu, regs->sprg4); + kvmppc_set_sprg5(vcpu, regs->sprg5); + kvmppc_set_sprg6(vcpu, regs->sprg6); + kvmppc_set_sprg7(vcpu, regs->sprg7); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 27a4b28..28c1588 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -165,16 +165,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) * guest (PR-mode only). */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; + kvmppc_set_sprg4(vcpu, spr_val); break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; + kvmppc_set_sprg5(vcpu, spr_val); break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; + kvmppc_set_sprg6(vcpu, spr_val); break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; + kvmppc_set_sprg7(vcpu, spr_val); break; case SPRN_IVPR: -- cgit v0.10.2 From 34f754b99e2f642c661967b456764b2c7ccc096e Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 17 Jul 2014 17:01:40 +0530 Subject: kvm: ppc: Add SPRN_EPR get helper function kvmppc_set_epr() is already defined in asm/kvm_ppc.h, So rename and move get_epr helper function to same file. Signed-off-by: Bharat Bhushan [agraf: remove duplicate return] Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c95bdbd..246fb9a7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -392,6 +392,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#elif defined(CONFIG_BOOKE) + return vcpu->arch.epr; +#else + return 0; +#endif +} + static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) { #ifdef CONFIG_KVM_BOOKE_HV diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 34562d4..a06ef6b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,15 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GEPR); -#else - return vcpu->arch.epr; -#endif -} - /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -1452,7 +1443,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { - u32 epr = get_guest_epr(vcpu); + u32 epr = kvmppc_get_epr(vcpu); val = get_reg_val(reg->id, epr); break; } -- cgit v0.10.2 From b5741bb3d4dc751a76f6caaaec8e2b271ff8ca48 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:18 +0300 Subject: KVM: PPC: e500mc: Revert "add load inst fixup" The commit 1d628af7 "add load inst fixup" made an attempt to handle failures generated by reading the guest current instruction. The fixup code that was added works by chance hiding the real issue. Load external pid (lwepx) instruction, used by KVM to read guest instructions, is executed in a subsituted guest translation context (EPLC[EGS] = 1). In consequence lwepx's TLB error and data storage interrupts need to be handled by KVM, even though these interrupts are generated from host context (MSR[GS] = 0) where lwepx is executed. Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1), doing minimal checks on the fast path to avoid host performance degradation. As a result, the host kernel handles lwepx faults searching the faulting guest data address (loaded in DEAR) in its own Logical Partition ID (LPID) 0 context. In case a host translation is found the execution returns to the lwepx instruction instead of the fixup, the host ending up in an infinite loop. Revert the commit "add load inst fixup". lwepx issue will be addressed in a subsequent patch without needing fixup code. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index a1712b8..6ff4480 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -29,7 +29,6 @@ #include #include #include -#include #ifdef CONFIG_64BIT #include @@ -164,32 +163,9 @@ PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) mtspr SPRN_EPLC, r8 - - /* disable preemption, so we are sure we hit the fixup handler */ - CURRENT_THREAD_INFO(r8, r1) - li r7, 1 - stw r7, TI_PREEMPT(r8) - isync - - /* - * In case the read goes wrong, we catch it and write an invalid value - * in LAST_INST instead. - */ -1: lwepx r9, 0, r5 -2: -.section .fixup, "ax" -3: li r9, KVM_INST_FETCH_FAILED - b 2b -.previous -.section __ex_table,"a" - PPC_LONG_ALIGN - PPC_LONG 1b,3b -.previous - + lwepx r9, 0, r5 mtspr SPRN_EPLC, r3 - li r7, 0 - stw r7, TI_PREEMPT(r8) stw r9, VCPU_LAST_INST(r4) .endif -- cgit v0.10.2 From 9c0d4e0dcf69b1ab3a9f8debebd119f53964cb57 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:19 +0300 Subject: KVM: PPC: Book3e: Add TLBSEL/TSIZE defines for MAS0/1 Add mising defines MAS0_GET_TLBSEL() and MAS1_GET_TSIZE() for Book3E. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 8d24f78..cd4f04a 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,9 +40,11 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL_MASK 0x30000000 -#define MAS0_TLBSEL_SHIFT 28 -#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_GET_TLBSEL(mas0) (((mas0) & MAS0_TLBSEL_MASK) >> \ + MAS0_TLBSEL_SHIFT) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -60,6 +62,7 @@ #define MAS1_TSIZE_MASK 0x00000f80 #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) +#define MAS1_GET_TSIZE(mas1) (((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT) #define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040 -- cgit v0.10.2 From 9a26af64d6bba72c9dfd62cc0cab0e79f8a66d7b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:20 +0300 Subject: KVM: PPC: Book3s: Remove kvmppc_read_inst() function In the context of replacing kvmppc_ld() function calls with a version of kvmppc_get_last_inst() which allow to fail, Alex Graf suggested this: "If we get EMULATE_AGAIN, we just have to make sure we go back into the guest. No need to inject an ISI into the guest - it'll do that all by itself. With an error returning kvmppc_get_last_inst we can just use completely get rid of kvmppc_read_inst() and only use kvmppc_get_last_inst() instead." As a intermediate step get rid of kvmppc_read_inst() and only use kvmppc_ld() instead. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e40765f..e76aec3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -710,42 +710,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) #endif } -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret == -ENOENT) { - ulong msr = kvmppc_get_msr(vcpu); - - msr = kvmppc_set_field(msr, 33, 33, 1); - msr = kvmppc_set_field(msr, 34, 36, 0); - msr = kvmppc_set_field(msr, 42, 47, 0); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) @@ -1149,31 +1113,49 @@ program_interrupt: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; + int emul; + ulong pc; + u32 last_inst; + + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { + /* Do paired single instruction emulation */ + pc = kvmppc_get_pc(vcpu); + last_inst = kvmppc_get_last_inst(vcpu); + emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, + false); + if (emul == EMULATE_DONE) + goto program_interrupt; + else + r = RESUME_GUEST; - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + break; } - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + /* Enable external provider */ + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: + ext_msr = MSR_FP; break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; + + case BOOK3S_INTERRUPT_ALTIVEC: + ext_msr = MSR_VEC; break; - default: - /* nothing to worry about - go again */ + + case BOOK3S_INTERRUPT_VSX: + ext_msr = MSR_VSX; break; } + + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); break; } case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - u32 last_inst = kvmppc_get_last_inst(vcpu); + { + ulong pc = kvmppc_get_pc(vcpu); + u32 last_inst = kvmppc_get_last_inst(vcpu); + int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false); + + if (emul == EMULATE_DONE) { u32 dsisr; u64 dar; @@ -1187,6 +1169,7 @@ program_interrupt: } r = RESUME_GUEST; break; + } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); -- cgit v0.10.2 From 51f047261e717b74b226f837a16455994b61ae30 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:21 +0300 Subject: KVM: PPC: Allow kvmppc_get_last_inst() to fail On book3e, guest last instruction is read on the exit path using load external pid (lwepx) dedicated instruction. This load operation may fail due to TLB eviction and execute-but-not-read entries. This patch lay down the path for an alternative solution to read the guest last instruction, by allowing kvmppc_get_lat_inst() function to fail. Architecture specific implmentations of kvmppc_load_last_inst() may read last guest instruction and instruct the emulation layer to re-execute the guest in case of failure. Make kvmppc_get_last_inst() definition common between architectures. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 20fb6f2..a86ca65 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -276,32 +276,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE); } -static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) -{ - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - - return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) : - vcpu->arch.last_inst; -} - -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu)); -} - -/* - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. - * Because the sc instruction sets SRR0 to point to the following - * instruction, we have to fetch from pc - 4. - */ -static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4); -} - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dar; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index c7aed61..cbb1990 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -69,11 +69,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return false; } -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.last_inst; -} - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.ctr = val; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 246fb9a7..e381363 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -47,6 +47,11 @@ enum emulation_result { EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; +enum instruction_type { + INST_GENERIC, + INST_SC, /* system call */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -62,6 +67,9 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); +extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst); + extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -234,6 +242,29 @@ struct kvmppc_ops { extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; +static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst) +{ + int ret = EMULATE_DONE; + u32 fetched_inst; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst); + + /* Write fetch_failed unswapped if the fetch failed */ + if (ret == EMULATE_DONE) + fetched_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.last_inst) : + vcpu->arch.last_inst; + else + fetched_inst = vcpu->arch.last_inst; + + *inst = fetched_inst; + return ret; +} + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) { return kvm->arch.kvm_ops == kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 31facfc..37ca8a0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -488,6 +488,23 @@ mmio: } EXPORT_SYMBOL_GPL(kvmppc_ld); +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *inst) +{ + ulong pc = kvmppc_get_pc(vcpu); + int r; + + if (type == INST_SC) + pc -= 4; + + r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false); + if (r == EMULATE_DONE) + return r; + else + return EMULATE_AGAIN; +} +EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { return 0; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d154d9..fa944a3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -530,21 +530,14 @@ static int instruction_is_store(unsigned int instr) static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { - int ret; u32 last_inst; - unsigned long srr0 = kvmppc_get_pc(vcpu); - /* We try to load the last instruction. We don't let - * emulate_instruction do it as it doesn't check what - * kvmppc_ld returns. + /* * If we fail, we just return to the guest and try executing it again. */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) - return RESUME_GUEST; - vcpu->arch.last_inst = last_inst; - } + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) + return RESUME_GUEST; /* * WARNING: We do not know for sure whether the instruction we just @@ -558,7 +551,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, * we just return and retry the instruction. */ - if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store) + if (instruction_is_store(last_inst) != !!is_store) return RESUME_GUEST; /* diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 6c8011f..bfb8035 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); + u32 inst; enum emulation_result emulated = EMULATE_DONE; + int ax_rd, ax_ra, ax_rb, ax_rc; + short full_d; + u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c; - int ax_rd = inst_get_field(inst, 6, 10); - int ax_ra = inst_get_field(inst, 11, 15); - int ax_rb = inst_get_field(inst, 16, 20); - int ax_rc = inst_get_field(inst, 21, 25); - short full_d = inst_get_field(inst, 16, 31); - - u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); - u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); - u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); - u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); - - bool rcomp = (inst & 1) ? true : false; - u32 cr = kvmppc_get_cr(vcpu); + bool rcomp; + u32 cr; #ifdef DEBUG int i; #endif + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ax_rd = inst_get_field(inst, 6, 10); + ax_ra = inst_get_field(inst, 11, 15); + ax_rb = inst_get_field(inst, 16, 20); + ax_rc = inst_get_field(inst, 21, 25); + full_d = inst_get_field(inst, 16, 31); + + fpr_d = &VCPU_FPR(vcpu, ax_rd); + fpr_a = &VCPU_FPR(vcpu, ax_ra); + fpr_b = &VCPU_FPR(vcpu, ax_rb); + fpr_c = &VCPU_FPR(vcpu, ax_rc); + + rcomp = (inst & 1) ? true : false; + cr = kvmppc_get_cr(vcpu); + if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e76aec3..b18f2d4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1018,15 +1018,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result er; ulong flags; + u32 last_inst; + int emul; program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + if (emul != EMULATE_DONE) { + r = RESUME_GUEST; + break; + } + if (kvmppc_get_msr(vcpu) & MSR_PR) { #ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", + kvmppc_get_pc(vcpu), last_inst); #endif - if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; @@ -1045,7 +1054,7 @@ program_interrupt: break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + __func__, kvmppc_get_pc(vcpu), last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; @@ -1062,8 +1071,23 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: + { + u32 last_sc; + int emul; + + /* Get last sc for papr */ + if (vcpu->arch.papr_enabled) { + /* The sc instuction points SRR0 to the next inst */ + emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc); + if (emul != EMULATE_DONE) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4); + r = RESUME_GUEST; + break; + } + } + if (vcpu->arch.papr_enabled && - (kvmppc_get_last_sc(vcpu) == 0x44000022) && + (last_sc == 0x44000022) && !(kvmppc_get_msr(vcpu) & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -1108,21 +1132,19 @@ program_interrupt: r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_FP_UNAVAIL: case BOOK3S_INTERRUPT_ALTIVEC: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; int emul; - ulong pc; u32 last_inst; if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { /* Do paired single instruction emulation */ - pc = kvmppc_get_pc(vcpu); - last_inst = kvmppc_get_last_inst(vcpu); - emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, - false); + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &last_inst); if (emul == EMULATE_DONE) goto program_interrupt; else @@ -1151,9 +1173,8 @@ program_interrupt: } case BOOK3S_INTERRUPT_ALIGNMENT: { - ulong pc = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false); + u32 last_inst; + int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul == EMULATE_DONE) { u32 dsisr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a06ef6b..50df5e3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -702,6 +702,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) * they were actually modified by emulation. */ return RESUME_GUEST_NV; + case EMULATE_AGAIN: + return RESUME_GUEST; + case EMULATE_DO_DCR: run->exit_reason = KVM_EXIT_DCR; return RESUME_HOST; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 79677d7..4385c14 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -610,6 +610,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + return EMULATE_AGAIN; +} + /************* MMU Notifiers *************/ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index da86d9b..c5c64b6 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -224,19 +224,25 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); - int ra = get_ra(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int sprn = get_sprn(inst); - enum emulation_result emulated = EMULATE_DONE; + u32 inst; + int ra, rs, rt, sprn; + enum emulation_result emulated; int advance = 1; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + sprn = get_sprn(inst); + switch (get_op(inst)) { case OP_TRAP: #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index fe0257a..cfa6cfa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -280,6 +280,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) * actually modified. */ r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_DO_MMIO: run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store @@ -289,11 +292,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST_NV; break; case EMULATE_FAIL: + { + u32 last_inst; + + kvmppc_get_last_inst(vcpu, false, &last_inst); /* XXX Deliver Program interrupt to guest. */ - printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, - kvmppc_get_last_inst(vcpu)); + pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; break; + } default: WARN_ON(1); r = RESUME_GUEST; -- cgit v0.10.2 From f5250471b2d6ad27d536cb34ce39d76b91b2b36b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 23 Jul 2014 19:06:22 +0300 Subject: KVM: PPC: Bookehv: Get vcpu's last instruction for emulation On book3e, KVM uses load external pid (lwepx) dedicated instruction to read guest last instruction on the exit path. lwepx exceptions (DTLB_MISS, DSI and LRAT), generated by loading a guest address, needs to be handled by KVM. These exceptions are generated in a substituted guest translation context (EPLC[EGS] = 1) from host context (MSR[GS] = 0). Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1), doing minimal checks on the fast path to avoid host performance degradation. lwepx exceptions originate from host state (MSR[GS] = 0) which implies additional checks in DO_KVM macro (beside the current MSR[GS] = 1) by looking at the Exception Syndrome Register (ESR[EPID]) and the External PID Load Context Register (EPLC[EGS]). Doing this on each Data TLB miss exception is obvious too intrusive for the host. Read guest last instruction from kvmppc_load_last_inst() by searching for the physical address and kmap it. This address the TODO for TLB eviction and execute-but-not-read entries, and allow us to get rid of lwepx until we are able to handle failures. A simple stress benchmark shows a 1% sys performance degradation compared with previous approach (lwepx without failure handling): time for i in `seq 1 10000`; do /bin/echo > /dev/null; done real 0m 8.85s user 0m 4.34s sys 0m 4.48s vs real 0m 8.84s user 0m 4.36s sys 0m 4.44s A solution to use lwepx and to handle its exceptions in KVM would be to temporary highjack the interrupt vector from host. This imposes additional synchronizations for cores like FSL e6500 that shares host IVOR registers between hardware threads. This optimized solution can be later developed on top of this patch. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 50df5e3..97bcde2 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -819,6 +819,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } +static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + enum emulation_result emulated, u32 last_inst) +{ + switch (emulated) { + case EMULATE_AGAIN: + return RESUME_GUEST; + + case EMULATE_FAIL: + pr_debug("%s: load instruction from guest address %lx failed\n", + __func__, vcpu->arch.pc); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -830,6 +852,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, int r = RESUME_HOST; int s; int idx; + u32 last_inst = KVM_INST_FETCH_FAILED; + enum emulation_result emulated = EMULATE_DONE; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -837,6 +861,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); + /* + * get last instruction before beeing preempted + * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA + */ + switch (exit_nr) { + case BOOKE_INTERRUPT_DATA_STORAGE: + case BOOKE_INTERRUPT_DTLB_MISS: + case BOOKE_INTERRUPT_HV_PRIV: + emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); + break; + default: + break; + } + local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); @@ -845,6 +883,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + if (emulated != EMULATE_DONE) { + r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + goto out; + } + switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); @@ -1134,6 +1177,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } +out: /* * To avoid clobbering exit_reason, only check for signals if we * aren't already exiting to userspace for some other reason. diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 6ff4480..e000b39 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -121,38 +121,14 @@ 1: .if \flags & NEED_EMU - /* - * This assumes you have external PID support. - * To support a bookehv CPU without external PID, you'll - * need to look up the TLB entry and create a temporary mapping. - * - * FIXME: we don't currently handle if the lwepx faults. PR-mode - * booke doesn't handle it either. Since Linux doesn't use - * broadcast tlbivax anymore, the only way this should happen is - * if the guest maps its memory execute-but-not-read, or if we - * somehow take a TLB miss in the middle of this entry code and - * evict the relevant entry. On e500mc, all kernel lowmem is - * bolted into TLB1 large page mappings, and we don't use - * broadcast invalidates, so we should not take a TLB miss here. - * - * Later we'll need to deal with faults here. Disallowing guest - * mappings that are execute-but-not-read could be an option on - * e500mc, but not on chips with an LRAT if it is used. - */ - - mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ PPC_STL r15, VCPU_GPR(R15)(r4) PPC_STL r16, VCPU_GPR(R16)(r4) PPC_STL r17, VCPU_GPR(R17)(r4) PPC_STL r18, VCPU_GPR(R18)(r4) PPC_STL r19, VCPU_GPR(R19)(r4) - mr r8, r3 PPC_STL r20, VCPU_GPR(R20)(r4) - rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS PPC_STL r21, VCPU_GPR(R21)(r4) - rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR PPC_STL r22, VCPU_GPR(R22)(r4) - rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID PPC_STL r23, VCPU_GPR(R23)(r4) PPC_STL r24, VCPU_GPR(R24)(r4) PPC_STL r25, VCPU_GPR(R25)(r4) @@ -162,10 +138,15 @@ PPC_STL r29, VCPU_GPR(R29)(r4) PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) - mtspr SPRN_EPLC, r8 - isync - lwepx r9, 0, r5 - mtspr SPRN_EPLC, r3 + + /* + * We don't use external PID support. lwepx faults would need to be + * handled by KVM and this implies aditional code in DO_KVM (for + * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which + * is too intrusive for the host. Get last instuction in + * kvmppc_get_last_inst(). + */ + li r9, KVM_INST_FETCH_FAILED stw r9, VCPU_LAST_INST(r4) .endif diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4385c14..4150826 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -610,11 +610,103 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +#ifdef CONFIG_KVM_BOOKE_HV +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + gva_t geaddr; + hpa_t addr; + hfn_t pfn; + hva_t eaddr; + u32 mas1, mas2, mas3; + u64 mas7_mas3; + struct page *page; + unsigned int addr_space, psize_shift; + bool pr; + unsigned long flags; + + /* Search TLB for guest pc to get the real address */ + geaddr = kvmppc_get_pc(vcpu); + + addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG; + + local_irq_save(flags); + mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); + asm volatile("tlbsx 0, %[geaddr]\n" : : + [geaddr] "r" (geaddr)); + mtspr(SPRN_MAS5, 0); + mtspr(SPRN_MAS8, 0); + mas1 = mfspr(SPRN_MAS1); + mas2 = mfspr(SPRN_MAS2); + mas3 = mfspr(SPRN_MAS3); +#ifdef CONFIG_64BIT + mas7_mas3 = mfspr(SPRN_MAS7_MAS3); +#else + mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3; +#endif + local_irq_restore(flags); + + /* + * If the TLB entry for guest pc was evicted, return to the guest. + * There are high chances to find a valid TLB entry next time. + */ + if (!(mas1 & MAS1_VALID)) + return EMULATE_AGAIN; + + /* + * Another thread may rewrite the TLB entry in parallel, don't + * execute from the address if the execute permission is not set + */ + pr = vcpu->arch.shared->msr & MSR_PR; + if (unlikely((pr && !(mas3 & MAS3_UX)) || + (!pr && !(mas3 & MAS3_SX)))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx without execute permission\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* + * The real address will be mapped by a cacheable, memory coherent, + * write-back page. Check for mismatches when LRAT is used. + */ + if (has_feature(vcpu, VCPU_FTR_MMU_V2) && + unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* Get pfn */ + psize_shift = MAS1_GET_TSIZE(mas1) + 10; + addr = (mas7_mas3 & (~0ULL << psize_shift)) | + (geaddr & ((1ULL << psize_shift) - 1ULL)); + pfn = addr >> PAGE_SHIFT; + + /* Guard against emulation from devices area */ + if (unlikely(!page_is_ram(pfn))) { + pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n", + __func__, addr); + return EMULATE_AGAIN; + } + + /* Map a page and get guest's instruction */ + page = pfn_to_page(pfn); + eaddr = (unsigned long)kmap_atomic(page); + *instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK)); + kunmap_atomic((u32 *)eaddr); + + return EMULATE_DONE; +} +#else int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *instr) { return EMULATE_AGAIN; } +#endif /************* MMU Notifiers *************/ -- cgit v0.10.2 From 99e99d19a86dc596703ed79dcecf9ca6b32a6a8a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 21 Jul 2014 11:23:26 +0530 Subject: kvm: ppc: bookehv: Save restore SPRN_SPRG9 on guest entry exit SPRN_SPRG is used by debug interrupt handler, so this is required for debug support. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 855ba4d..562f685 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -587,6 +587,7 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u64 sprg9; u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 17ffcb4..ab9ae04 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -668,6 +668,7 @@ int main(void) DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e000b39..b4f8fba 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -398,6 +398,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT PPC_LL r3, PACA_SPRG_VDSO(r13) #endif + mfspr r5, SPRN_SPRG9 PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) @@ -405,6 +406,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT mtspr SPRN_SPRG_VDSO_WRITE, r3 #endif + PPC_STD(r5, VCPU_SPRG9, r4) PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 PPC_STD(r9, VCPU_SHARED_SPRG7, r11) @@ -639,7 +641,9 @@ lightweight_exit: mtspr SPRN_SPRG5W, r6 PPC_LD(r8, VCPU_SHARED_SPRG7, r11) mtspr SPRN_SPRG6W, r7 + PPC_LD(r5, VCPU_SPRG9, r4) mtspr SPRN_SPRG7W, r8 + mtspr SPRN_SPRG9, r5 /* Load some guest volatiles. */ PPC_LL r3, VCPU_LR(r4) -- cgit v0.10.2 From 28d2f421bcff6781b80decac59da414f86f35c2a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 25 Jul 2014 11:21:08 +0530 Subject: KVM: PPC: Booke-hv: Add one reg interface for SPRG9 We now support SPRG9 for guest, so also add a one reg interface for same Note: Changes are in bookehv code only as we do not have SPRG9 on booke-pr. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2bc4a94..0e56d9e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -555,6 +555,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) +#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 690499d..164bad2 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + *val = get_reg_val(id, vcpu->arch.sprg9); + break; + default: + r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + } + return r; } static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + vcpu->arch.sprg9 = set_reg_val(id, *val); + break; + default: + r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + } + return r; } -- cgit v0.10.2 From 8c95ead6039d46d2d5c375d3cadac8708121fbe4 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 25 Jul 2014 11:32:43 +0530 Subject: KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD <=> SPRG3 (thread->vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c8f3381..0ef17ad 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -944,9 +944,6 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors -- cgit v0.10.2 From b2677b8dd8de0dc1496ede4da09b9dfd59f15cea Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Jul 2014 10:38:59 +0200 Subject: KVM: PPC: Remove 440 support The 440 target hasn't been properly functioning for a few releases and before I was the only one who fixes a very serious bug that indicates to me that nobody used it before either. Furthermore KVM on 440 is slow to the extent of unusable. We don't have to carry along completely unused code. Remove 440 and give us one less thing to worry about. Signed-off-by: Alexander Graf diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX index 6db73df..a68784d 100644 --- a/Documentation/powerpc/00-INDEX +++ b/Documentation/powerpc/00-INDEX @@ -17,8 +17,6 @@ firmware-assisted-dump.txt - Documentation on the firmware assisted dump mechanism "fadump". hvcs.txt - IBM "Hypervisor Virtual Console Server" Installation Guide -kvm_440.txt - - Various notes on the implementation of KVM for PowerPC 440. mpc52xx.txt - Linux 2.6.x on MPC52xx family pmu-ebb.txt diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt deleted file mode 100644 index c02a003..0000000 --- a/Documentation/powerpc/kvm_440.txt +++ /dev/null @@ -1,41 +0,0 @@ -Hollis Blanchard -15 Apr 2008 - -Various notes on the implementation of KVM for PowerPC 440: - -To enforce isolation, host userspace, guest kernel, and guest userspace all -run at user privilege level. Only the host kernel runs in supervisor mode. -Executing privileged instructions in the guest traps into KVM (in the host -kernel), where we decode and emulate them. Through this technique, unmodified -440 Linux kernels can be run (slowly) as guests. Future performance work will -focus on reducing the overhead and frequency of these traps. - -The usual code flow is started from userspace invoking an "run" ioctl, which -causes KVM to switch into guest context. We use IVPR to hijack the host -interrupt vectors while running the guest, which allows us to direct all -interrupts to kvmppc_handle_interrupt(). At this point, we could either -- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or -- let the host interrupt handler run (e.g. when the decrementer fires), or -- return to host userspace (e.g. when the guest performs device MMIO) - -Address spaces: We take advantage of the fact that Linux doesn't use the AS=1 -address space (in host or guest), which gives us virtual address space to use -for guest mappings. While the guest is running, the host kernel remains mapped -in AS=0, but the guest can only use AS=1 mappings. - -TLB entries: The TLB entries covering the host linear mapping remain -present while running the guest. This reduces the overhead of lightweight -exits, which are handled by KVM running in the host kernel. We keep three -copies of the TLB: - - guest TLB: contents of the TLB as the guest sees it - - shadow TLB: the TLB that is actually in hardware while guest is running - - host TLB: to restore TLB state when context switching guest -> host -When a TLB miss occurs because a mapping was not present in the shadow TLB, -but was present in the guest TLB, KVM handles the fault without invoking the -guest. Large guest pages are backed by multiple 4KB shadow pages through this -mechanism. - -IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network -and block IO, so those drivers must be enabled in the guest. It's possible -that some qemu device emulation (e.g. e1000 or rtl8139) may also work with -little effort. diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 790352f..93500f6 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT config PPC_EARLY_DEBUG_44x bool "Early serial debugging for IBM/AMCC 44x CPUs" - # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water - # mark, which doesn't work with current 440 KVM. - depends on 44x && !KVM + depends on 44x help Select this to enable early debugging for IBM 44x chips via the inbuilt serial port. If you enable this, ensure you set diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index ccf66b9..924e10d 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_VIRTUALIZATION=y -CONFIG_KVM_440=y diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h deleted file mode 100644 index a0e5761..0000000 --- a/arch/powerpc/include/asm/kvm_44x.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#ifndef __ASM_44X_H__ -#define __ASM_44X_H__ - -#include - -#define PPC44x_TLB_SIZE 64 - -/* If the guest is expecting it, this can be as large as we like; we'd just - * need to find some way of advertising it. */ -#define KVM44x_GUEST_TLB_SIZE 64 - -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - -struct kvmppc_44x_shadow_ref { - struct page *page; - u16 gtlb_index; - u8 writeable; - u8 tid; -}; - -struct kvmppc_vcpu_44x { - /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - - /* References to guest pages in the hardware TLB. */ - struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; - - /* State of the shadow TLB at guest context switch time. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); -} - -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); - -#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 3f3e530..b8901c4 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -33,7 +33,6 @@ /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) -#define VCPU_TLB_PGSZ PPC44x_TLB_64K #define VCPU_SIZE_BYTES (1< #define KVM_ARCH_WANT_MMU_NOTIFIER @@ -62,8 +61,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -#endif - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c deleted file mode 100644 index 9cb4b0a..0000000 --- a/arch/powerpc/kvm/44x.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "44x_tlb.h" -#include "booke.h" - -static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) -{ - kvmppc_booke_vcpu_load(vcpu, cpu); - kvmppc_44x_tlb_load(vcpu); -} - -static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) -{ - kvmppc_44x_tlb_put(vcpu); - kvmppc_booke_vcpu_put(vcpu); -} - -int kvmppc_core_check_processor_compat(void) -{ - int r; - - if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) - r = 0; - else - r = -ENOTSUPP; - - return r; -} - -int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; - int i; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - /* XXX CCR1 doesn't exist on all 440 SoCs. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) - vcpu_44x->shadow_refs[i].gtlb_index = -1; - - vcpu->arch.cpu_type = KVM_CPU_440; - vcpu->arch.pvr = mfspr(SPRN_PVR); - - return 0; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - -static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_get_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_set_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, - unsigned int id) -{ - struct kvmppc_vcpu_44x *vcpu_44x; - struct kvm_vcpu *vcpu; - int err; - - vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_44x) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_44x->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) - goto uninit_vcpu; - - return vcpu; - -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -out: - return ERR_PTR(err); -} - -static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - - free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -} - -static int kvmppc_core_init_vm_44x(struct kvm *kvm) -{ - return 0; -} - -static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) -{ -} - -static struct kvmppc_ops kvm_ops_44x = { - .get_sregs = kvmppc_core_get_sregs_44x, - .set_sregs = kvmppc_core_set_sregs_44x, - .get_one_reg = kvmppc_get_one_reg_44x, - .set_one_reg = kvmppc_set_one_reg_44x, - .vcpu_load = kvmppc_core_vcpu_load_44x, - .vcpu_put = kvmppc_core_vcpu_put_44x, - .vcpu_create = kvmppc_core_vcpu_create_44x, - .vcpu_free = kvmppc_core_vcpu_free_44x, - .mmu_destroy = kvmppc_mmu_destroy_44x, - .init_vm = kvmppc_core_init_vm_44x, - .destroy_vm = kvmppc_core_destroy_vm_44x, - .emulate_op = kvmppc_core_emulate_op_44x, - .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, - .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, -}; - -static int __init kvmppc_44x_init(void) -{ - int r; - - r = kvmppc_booke_init(); - if (r) - goto err_out; - - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); - if (r) - goto err_out; - kvm_ops_44x.owner = THIS_MODULE; - kvmppc_pr_ops = &kvm_ops_44x; - -err_out: - return r; -} - -static void __exit kvmppc_44x_exit(void) -{ - kvmppc_pr_ops = NULL; - kvmppc_booke_exit(); -} - -module_init(kvmppc_44x_init); -module_exit(kvmppc_44x_exit); -MODULE_ALIAS_MISCDEV(KVM_MINOR); -MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c deleted file mode 100644 index 92c9ab4..0000000 --- a/arch/powerpc/kvm/44x_emulate.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include -#include "timing.h" - -#include "booke.h" -#include "44x_tlb.h" - -#define XOP_MFDCRX 259 -#define XOP_MFDCR 323 -#define XOP_MTDCRX 387 -#define XOP_MTDCR 451 -#define XOP_TLBSX 914 -#define XOP_ICCCI 966 -#define XOP_TLBWE 978 - -static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) -{ - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - return EMULATE_DONE; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); - vcpu->run->dcr.is_write = 1; - vcpu->arch.dcr_is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } -} - -static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) -{ - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = 0; - vcpu->run->dcr.is_write = 0; - vcpu->arch.dcr_is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } - - return EMULATE_DONE; -} - -int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) -{ - int emulated = EMULATE_DONE; - int dcrn = get_dcrn(inst); - int ra = get_ra(inst); - int rb = get_rb(inst); - int rc = get_rc(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int ws = get_ws(inst); - - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - - case XOP_MFDCR: - emulated = emulate_mfdcr(vcpu, rt, dcrn); - break; - - case XOP_MFDCRX: - emulated = emulate_mfdcr(vcpu, rt, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_MTDCR: - emulated = emulate_mtdcr(vcpu, rs, dcrn); - break; - - case XOP_MTDCRX: - emulated = emulate_mtdcr(vcpu, rs, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_TLBWE: - emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); - break; - - case XOP_TLBSX: - emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); - break; - - case XOP_ICCCI: - break; - - default: - emulated = EMULATE_FAIL; - } - - break; - - default: - emulated = EMULATE_FAIL; - } - - if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); - - return emulated; -} - -int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - kvmppc_set_pid(vcpu, spr_val); break; - case SPRN_MMUCR: - vcpu->arch.mmucr = spr_val; break; - case SPRN_CCR0: - vcpu->arch.ccr0 = spr_val; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = spr_val; break; - default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); - } - - return emulated; -} - -int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - *spr_val = vcpu->arch.pid; break; - case SPRN_MMUCR: - *spr_val = vcpu->arch.mmucr; break; - case SPRN_CCR0: - *spr_val = vcpu->arch.ccr0; break; - case SPRN_CCR1: - *spr_val = vcpu->arch.ccr1; break; - default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); - } - - return emulated; -} - diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c deleted file mode 100644 index 0deef10..0000000 --- a/arch/powerpc/kvm/44x_tlb.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "timing.h" - -#include "44x_tlb.h" -#include "trace.h" - -#ifndef PPC44x_TLBE_SIZE -#define PPC44x_TLBE_SIZE PPC44x_TLB_4K -#endif - -#define PAGE_SIZE_4K (1<<12) -#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) - -#define PPC44x_TLB_UATTR_MASK \ - (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) -#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) -#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) - -#ifdef DEBUG -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - tlbe = &vcpu_44x->guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} -#endif - -static inline void kvmppc_44x_tlbie(unsigned int index) -{ - /* 0 <= index < 64, so the V bit is clear and we can use the index as - * word0. */ - asm volatile( - "tlbwe %[index], %[index], 0\n" - : - : [index] "r"(index) - ); -} - -static inline void kvmppc_44x_tlbre(unsigned int index, - struct kvmppc_44x_tlbe *tlbe) -{ - asm volatile( - "tlbre %[word0], %[index], 0\n" - "mfspr %[tid], %[sprn_mmucr]\n" - "andi. %[tid], %[tid], 0xff\n" - "tlbre %[word1], %[index], 1\n" - "tlbre %[word2], %[index], 2\n" - : [word0] "=r"(tlbe->word0), - [word1] "=r"(tlbe->word1), - [word2] "=r"(tlbe->word2), - [tid] "=r"(tlbe->tid) - : [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - : "cc" - ); -} - -static inline void kvmppc_44x_tlbwe(unsigned int index, - struct kvmppc_44x_tlbe *stlbe) -{ - unsigned long tmp; - - asm volatile( - "mfspr %[tmp], %[sprn_mmucr]\n" - "rlwimi %[tmp], %[tid], 0, 0xff\n" - "mtspr %[sprn_mmucr], %[tmp]\n" - "tlbwe %[word0], %[index], 0\n" - "tlbwe %[word1], %[index], 1\n" - "tlbwe %[word2], %[index], 2\n" - : [tmp] "=&r"(tmp) - : [word0] "r"(stlbe->word0), - [word1] "r"(stlbe->word1), - [word2] "r"(stlbe->word2), - [tid] "r"(stlbe->tid), - [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - ); -} - -static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) -{ - /* We only care about the guest's permission and user bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; - - if (!usermode) { - /* Guest is in supervisor mode, so we need to translate guest - * supervisor permissions into user permissions. */ - attrib &= ~PPC44x_TLB_USER_PERM_MASK; - attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; - } - - /* Make sure host can always access this memory. */ - attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; - - /* WIMGE = 0b00100 */ - attrib |= PPC44x_TLB_M; - - return attrib; -} - -/* Load shadow TLB back into hardware. */ -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbwe(i, stlbe); - } -} - -static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int i) -{ - vcpu_44x->shadow_tlb_mod[i] = 1; -} - -/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (vcpu_44x->shadow_tlb_mod[i]) - kvmppc_44x_tlbre(i, stlbe); - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbie(i); - } -} - - -/* Search the guest TLB for a matching entry. */ -int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, - unsigned int as) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; - unsigned int tid; - - if (eaddr < get_tlb_eaddr(tlbe)) - continue; - - if (eaddr > get_tlb_end(tlbe)) - continue; - - tid = get_tlb_tid(tlbe); - if (tid && (tid != pid)) - continue; - - if (!get_tlb_v(tlbe)) - continue; - - if (get_tlb_ts(tlbe) != as) - continue; - - return i; - } - - return -1; -} - -gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, - gva_t eaddr) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; - - return get_tlb_raddr(gtlbe) | (eaddr & pgmask); -} - -int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) -{ -} - -static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int stlb_index) -{ - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; - - if (!ref->page) - return; - - /* Discard from the TLB. */ - /* Note: we could actually invalidate a host mapping, if the host overwrote - * this TLB entry since we inserted a guest mapping. */ - kvmppc_44x_tlbie(stlb_index); - - /* Now release the page. */ - if (ref->writeable) - kvm_release_page_dirty(ref->page); - else - kvm_release_page_clean(ref->page); - - ref->page = NULL; - - /* XXX set tlb_44x_index to stlb_index? */ - - trace_kvm_stlb_inval(stlb_index); -} - -void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu_44x, i); -} - -/** - * kvmppc_mmu_map -- create a host mapping for guest memory - * - * If the guest wanted a larger page than the host supports, only the first - * host page is mapped here and the rest are demand faulted. - * - * If the guest wanted a smaller page than the host page size, we map only the - * guest-size page (i.e. not a full host page mapping). - * - * Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. - */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - unsigned int gtlb_index) -{ - struct kvmppc_44x_tlbe stlbe; - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - struct kvmppc_44x_shadow_ref *ref; - struct page *new_page; - hpa_t hpaddr; - gfn_t gfn; - u32 asid = gtlbe->tid; - u32 flags = gtlbe->word2; - u32 max_bytes = get_tlb_bytes(gtlbe); - unsigned int victim; - - /* Select TLB entry to clobber. Indirectly guard against races with the TLB - * miss handler by disabling interrupts. */ - local_irq_disable(); - victim = ++tlb_44x_index; - if (victim > tlb_44x_hwater) - victim = 0; - tlb_44x_index = victim; - local_irq_enable(); - - /* Get reference to new page. */ - gfn = gpaddr >> PAGE_SHIFT; - new_page = gfn_to_page(vcpu->kvm, gfn); - if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", - (unsigned long long)gfn); - return; - } - hpaddr = page_to_phys(new_page); - - /* Invalidate any previous shadow mappings. */ - kvmppc_44x_shadow_release(vcpu_44x, victim); - - /* XXX Make sure (va, size) doesn't overlap any other - * entries. 440x6 user manual says the result would be - * "undefined." */ - - /* XXX what about AS? */ - - /* Force TS=1 for all guest mappings. */ - stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; - - if (max_bytes >= PAGE_SIZE) { - /* Guest mapping is larger than or equal to host page size. We can use - * a "native" host mapping. */ - stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; - } else { - /* Guest mapping is smaller than host page size. We must restrict the - * size of the mapping to be at most the smaller of the two, but for - * simplicity we fall back to a 4K mapping (this is probably what the - * guest is using anyways). */ - stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; - - /* 'hpaddr' is a host page, which is larger than the mapping we're - * inserting here. To compensate, we must add the in-page offset to the - * sub-page. */ - hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); - } - - stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.shared->msr & MSR_PR); - stlbe.tid = !(asid & 0xff); - - /* Keep track of the reference so we can properly release it later. */ - ref = &vcpu_44x->shadow_refs[victim]; - ref->page = new_page; - ref->gtlb_index = gtlb_index; - ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); - ref->tid = stlbe.tid; - - /* Insert shadow mapping into hardware TLB. */ - kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); - kvmppc_44x_tlbwe(victim, &stlbe); - trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2); -} - -/* For a particular guest TLB entry, invalidate the corresponding host TLB - * mappings and release the host pages. */ -static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, - unsigned int gtlb_index) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - if (ref->gtlb_index == gtlb_index) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) -{ - int usermode = vcpu->arch.shared->msr & MSR_PR; - - vcpu->arch.shadow_pid = !usermode; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - if (unlikely(vcpu->arch.pid == new_pid)) - return; - - vcpu->arch.pid = new_pid; - - /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it - * can't access guest kernel mappings (TID=1). When we switch to a new - * guest PID, which will also use host PID=0, we must discard the old guest - * userspace mappings. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - - if (ref->tid == 0) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvmppc_44x_tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - unsigned int gtlb_index; - int idx; - - gtlb_index = kvmppc_get_gpr(vcpu, ra); - if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { - printk("%s: index %d\n", __func__, gtlb_index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu_44x->guest_tlb[gtlb_index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ - if (tlbe->word0 & PPC44x_TLB_VALID) - kvmppc_44x_invalidate(vcpu, gtlb_index); - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = get_mmucr_stid(vcpu); - tlbe->word0 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = kvmppc_get_gpr(vcpu, rs); - break; - - default: - return EMULATE_FAIL; - } - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - if (tlbe_is_host_safe(vcpu, tlbe)) { - gva_t eaddr; - gpa_t gpaddr; - u32 bytes; - - eaddr = get_tlb_eaddr(tlbe); - gpaddr = get_tlb_raddr(tlbe); - - /* Use the advertised page size to mask effective and real addrs. */ - bytes = get_tlb_bytes(tlbe); - eaddr &= ~(bytes - 1); - gpaddr &= ~(bytes - 1); - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); - } - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - - trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; -} - -int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) -{ - u32 ea; - int gtlb_index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - - gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - u32 cr = kvmppc_get_cr(vcpu); - - if (gtlb_index < 0) - kvmppc_set_cr(vcpu, cr & ~0x20000000); - else - kvmppc_set_cr(vcpu, cr | 0x20000000); - } - kvmppc_set_gpr(vcpu, rt, gtlb_index); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; -} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h deleted file mode 100644 index a9ff80e..0000000 --- a/arch/powerpc/kvm/44x_tlb.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard - */ - -#ifndef __KVM_POWERPC_TLB_H__ -#define __KVM_POWERPC_TLB_H__ - -#include -#include - -extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, - unsigned int pid, unsigned int as); - -extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, - u8 rc); -extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); - -/* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 4) & 0xf; -} - -static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->word0 & 0xfffffc00; -} - -static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1 << 10 << (pgsize << 1); -} - -static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) -{ - return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; -} - -static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) -{ - u64 word1 = tlbe->word1; - return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); -} - -static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->tid & 0xff; -} - -static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 8) & 0x1; -} - -static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 9) & 0x1; -} - -static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.mmucr & 0xff; -} - -static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mmucr >> 16) & 0x1; -} - -#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8aeeda1..8f104a6 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -112,23 +112,9 @@ config KVM_BOOK3S_64_PR config KVM_BOOKE_HV bool -config KVM_440 - bool "KVM support for PowerPC 440 processors" - depends on 44x - select KVM - select KVM_MMIO - ---help--- - Support running unmodified 440 guest kernels in virtual machines on - 440 host processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - If unsure, say N. - config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500V2 || KVM_E500MC + depends on KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ce569b6..777f894 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,7 +10,6 @@ KVM := ../../../virt/kvm common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o -CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. @@ -21,16 +20,6 @@ obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-440-objs := \ - $(common-objs-y) \ - booke.o \ - booke_emulate.o \ - booke_interrupts.o \ - 44x.o \ - 44x_tlb.o \ - 44x_emulate.o -kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs) - kvm-e500-objs := \ $(common-objs-y) \ booke.o \ @@ -127,7 +116,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) -obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500V2) += kvm.o obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index b632cd3..f753543 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -99,13 +99,6 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); -extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong spr_val); -extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong *spr_val); extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2c6deb5ef..84c308a 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -424,10 +423,6 @@ lightweight_exit: mtspr SPRN_PID1, r3 #endif -#ifdef CONFIG_44x - iccci 0, 0 /* XXX hack */ -#endif - /* Load some guest volatiles. */ lwz r0, VCPU_GPR(R0)(r4) lwz r2, VCPU_GPR(R2)(r4) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index b4f8fba..e9fa56a 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cfa6cfa..8e03568 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -217,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case KVM_HCALL_TOKEN(KVM_HC_FEATURES): r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) - /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif -- cgit v0.10.2 From a0840240c0c6bcbac8f0f5db11f95c19aaf9b52f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sat, 19 Jul 2014 17:59:34 +1000 Subject: KVM: PPC: Book3S: Fix LPCR one_reg interface Unfortunately, the LPCR got defined as a 32-bit register in the one_reg interface. This is unfortunate because KVM allows userspace to control the DPFD (default prefetch depth) field, which is in the upper 32 bits. The result is that DPFD always get set to 0, which reduces performance in the guest. We can't just change KVM_REG_PPC_LPCR to be a 64-bit register ID, since that would break existing userspace binaries. Instead we define a new KVM_REG_PPC_LPCR_64 id which is 64-bit. Userspace can still use the old KVM_REG_PPC_LPCR id, but it now only modifies those fields in the bottom 32 bits that userspace can modify (ILE, TC and AIL). If userspace uses the new KVM_REG_PPC_LPCR_64 id, it can modify DPFD as well. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Cc: stable@vger.kernel.org Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6955318..884f819 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1869,7 +1869,8 @@ registers, find a list below: PPC | KVM_REG_PPC_PID | 64 PPC | KVM_REG_PPC_ACOP | 64 PPC | KVM_REG_PPC_VRSAVE | 32 - PPC | KVM_REG_PPC_LPCR | 64 + PPC | KVM_REG_PPC_LPCR | 32 + PPC | KVM_REG_PPC_LPCR_64 | 64 PPC | KVM_REG_PPC_PPR | 64 PPC | KVM_REG_PPC_ARCH_COMPAT 32 PPC | KVM_REG_PPC_DABRX | 32 diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0e56d9e..e0e49db 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -548,6 +548,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) +#define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5) #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) /* Architecture compatibility level */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f1281c4..0c5266e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -863,7 +863,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } -static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, + bool preserve_top32) { struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; @@ -898,6 +899,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) mask |= LPCR_AIL; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -1011,6 +1016,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: *val = get_reg_val(id, vcpu->arch.vcore->lpcr); break; case KVM_REG_PPC_PPR: @@ -1216,7 +1222,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, ALIGN(set_reg_val(id, *val), 1UL << 24); break; case KVM_REG_PPC_LPCR: - kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); + break; + case KVM_REG_PPC_LPCR_64: + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); break; case KVM_REG_PPC_PPR: vcpu->arch.ppr = set_reg_val(id, *val); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b18f2d4..e7a1fa2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1314,6 +1314,7 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, to_book3s(vcpu)->hior); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: /* * We are only interested in the LPCR_ILE bit */ @@ -1349,6 +1350,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior_explicit = true; break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; default: -- cgit v0.10.2 From ef1af2e29622ff3403926ae801a2b10da075a2de Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Jul 2014 17:59:35 +1000 Subject: KVM: PPC: Book3S PR: Take SRCU read lock around RTAS kvm_read_guest() call This does for PR KVM what c9438092cae4 ("KVM: PPC: Book3S HV: Take SRCU read lock around kvm_read_guest() call") did for HV KVM, that is, eliminate a "suspicious rcu_dereference_check() usage!" warning by taking the SRCU lock around the call to kvmppc_rtas_hcall(). It also fixes a return of RESUME_HOST to return EMULATE_FAIL instead, since kvmppc_h_pr() is supposed to return EMULATE_* values. Signed-off-by: Paul Mackerras Cc: stable@vger.kernel.org Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 6d0143f..ce3c893 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -267,6 +267,8 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + int rc, idx; + if (cmd <= MAX_HCALL_OPCODE && !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) return EMULATE_FAIL; @@ -299,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) - return RESUME_HOST; - if (kvmppc_rtas_hcall(vcpu)) + break; + idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvmppc_rtas_hcall(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (rc) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; -- cgit v0.10.2 From 1b2e33b071b13980a1f0823fbf139242059697b4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Jul 2014 17:59:36 +1000 Subject: KVM: PPC: Book3S: Make kvmppc_ld return a more accurate error indication At present, kvmppc_ld calls kvmppc_xlate, and if kvmppc_xlate returns any error indication, it returns -ENOENT, which is taken to mean an HPTE not found error. However, the error could have been a segment found (no SLB entry) or a permission error. Similarly, kvmppc_pte_to_hva currently does permission checking, but any error from it is taken by kvmppc_ld to mean that the access is an emulated MMIO access. Also, kvmppc_ld does no execute permission checking. This fixes these problems by (a) returning any error from kvmppc_xlate directly, (b) moving the permission check from kvmppc_pte_to_hva into kvmppc_ld, and (c) adding an execute permission check to kvmppc_ld. This is similar to what was done for kvmppc_st() by commit 82ff911317c3 ("KVM: PPC: Deflect page write faults properly in kvmppc_st"). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 37ca8a0..a3cbada 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -413,17 +413,10 @@ static hva_t kvmppc_bad_hva(void) return PAGE_OFFSET; } -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, - bool read) +static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { hva_t hpage; - if (read && !pte->may_read) - goto err; - - if (!read && !pte->may_write) - goto err; - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); if (kvm_is_error_hva(hpage)) goto err; @@ -462,15 +455,23 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { struct kvmppc_pte pte; hva_t hva = *eaddr; + int rc; vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) - goto nopte; + rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte); + if (rc) + return rc; *eaddr = pte.raddr; - hva = kvmppc_pte_to_hva(vcpu, &pte, true); + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + hva = kvmppc_pte_to_hva(vcpu, &pte); if (kvm_is_error_hva(hva)) goto mmio; @@ -481,8 +482,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; -nopte: - return -ENOENT; mmio: return EMULATE_DO_MMIO; } -- cgit v0.10.2 From de9bdd1a604d30b4e05dc18b5cc6354949253abd Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 18 Jul 2014 14:18:42 +1000 Subject: Split out struct kvmppc_vcore creation to separate function No code changes, just split it out to a function so that with the addition of micro partition prefetch buffer allocation (in subsequent patch) looks neater and doesn't require excessive indentation. Signed-off-by: Stewart Smith Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0c5266e..5042ccc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1303,6 +1303,26 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) +{ + struct kvmppc_vcore *vcore; + + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + + if (vcore == NULL) + return NULL; + + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = TB_NIL; + vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_subcore; + vcore->kvm = kvm; + + return vcore; +} + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { @@ -1354,16 +1374,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_lock(&kvm->lock); vcore = kvm->arch.vcores[core]; if (!vcore) { - vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); - if (vcore) { - INIT_LIST_HEAD(&vcore->runnable_threads); - spin_lock_init(&vcore->lock); - init_waitqueue_head(&vcore->wq); - vcore->preempt_tb = TB_NIL; - vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; - vcore->kvm = kvm; - } + vcore = kvmppc_vcore_create(kvm, core); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; } -- cgit v0.10.2 From 9678cdaae93932473f696fdea5debf3eee1e1260 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 18 Jul 2014 14:18:43 +1000 Subject: Use the POWER8 Micro Partition Prefetch Engine in KVM HV on POWER8 The POWER8 processor has a Micro Partition Prefetch Engine, which is a fancy way of saying "has way to store and load contents of L2 or L2+MRU way of L3 cache". We initiate the storing of the log (list of addresses) using the logmpp instruction and start restore by writing to a SPR. The logmpp instruction takes parameters in a single 64bit register: - starting address of the table to store log of L2/L2+L3 cache contents - 32kb for L2 - 128kb for L2+L3 - Aligned relative to maximum size of the table (32kb or 128kb) - Log control (no-op, L2 only, L2 and L3, abort logout) We should abort any ongoing logging before initiating one. To initiate restore, we write to the MPPR SPR. The format of what to write to the SPR is similar to the logmpp instruction parameter: - starting address of the table to read from (same alignment requirements) - table size (no data, until end of table) - prefetch rate (from fastest possible to slower. about every 8, 16, 24 or 32 cycles) The idea behind loading and storing the contents of L2/L3 cache is to reduce memory latency in a system that is frequently swapping vcores on a physical CPU. The best case scenario for doing this is when some vcores are doing very cache heavy workloads. The worst case is when they have about 0 cache hits, so we just generate needless memory operations. This implementation just does L2 store/load. In my benchmarks this proves to be useful. Benchmark 1: - 16 core POWER8 - 3x Ubuntu 14.04LTS guests (LE) with 8 VCPUs each - No split core/SMT - two guests running sysbench memory test. sysbench --test=memory --num-threads=8 run - one guest running apache bench (of default HTML page) ab -n 490000 -c 400 http://localhost/ This benchmark aims to measure performance of real world application (apache) where other guests are cache hot with their own workloads. The sysbench memory benchmark does pointer sized writes to a (small) memory buffer in a loop. In this benchmark with this patch I can see an improvement both in requests per second (~5%) and in mean and median response times (again, about 5%). The spread of minimum and maximum response times were largely unchanged. benchmark 2: - Same VM config as benchmark 1 - all three guests running sysbench memory benchmark This benchmark aims to see if there is a positive or negative affect to this cache heavy benchmark. Although due to the nature of the benchmark (stores) we may not see a difference in performance, but rather hopefully an improvement in consistency of performance (when vcore switched in, don't have to wait many times for cachelines to be pulled in) The results of this benchmark are improvements in consistency of performance rather than performance itself. With this patch, the few outliers in duration go away and we get more consistent performance in each guest. benchmark 3: - same 3 guests and CPU configuration as benchmark 1 and 2. - two idle guests - 1 guest running STREAM benchmark This scenario also saw performance improvement with this patch. On Copy and Scale workloads from STREAM, I got 5-6% improvement with this patch. For Add and triad, it was around 10% (or more). benchmark 4: - same 3 guests as previous benchmarks - two guests running sysbench --memory, distinctly different cache heavy workload - one guest running STREAM benchmark. Similar improvements to benchmark 3. benchmark 5: - 1 guest, 8 VCPUs, Ubuntu 14.04 - Host configured with split core (SMT8, subcores-per-core=4) - STREAM benchmark In this benchmark, we see a 10-20% performance improvement across the board of STREAM benchmark results with this patch. Based on preliminary investigation and microbenchmarks by Prerna Saxena Signed-off-by: Stewart Smith Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ed0afc1..34a05a1 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -39,6 +40,12 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; + +static inline void logmpp(u64 x) +{ + asm volatile(PPC_LOGMPP(R1) : : "r" (x)); +} + #endif /* __powerpc64__ && ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5fe2b5d..11385bb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -307,6 +307,8 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ + void *mpp_buffer; /* Micro Partition Prefetch buffer */ + bool mpp_buffer_is_valid; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3132bb9..c636841 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -139,6 +139,7 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 +#define PPC_INST_LOGMPP 0x7c0007e4 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -275,6 +276,20 @@ #define __PPC_EH(eh) 0 #endif +/* POWER8 Micro Partition Prefetch (MPP) parameters */ +/* Address mask is common for LOGMPP instruction and MPPR SPR */ +#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 + +/* Bits 60 and 61 of MPP SPR should be set to one of the following */ +/* Aborting the fetch is indeed setting 00 in the table size bits */ +#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60) +#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60) + +/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */ +#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54) +#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54) +#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) @@ -283,6 +298,8 @@ #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \ + __PPC_RB(b)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0ef17ad..c547b26 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -225,6 +225,7 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */ #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5042ccc..c470d55 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,13 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); +#if defined(CONFIG_PPC_64K_PAGES) +#define MPP_BUFFER_ORDER 0 +#elif defined(CONFIG_PPC_4K_PAGES) +#define MPP_BUFFER_ORDER 3 +#endif + + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -1320,6 +1328,13 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) vcore->first_vcpuid = core * threads_per_subcore; vcore->kvm = kvm; + vcore->mpp_buffer_is_valid = false; + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcore->mpp_buffer = (void *)__get_free_pages( + GFP_KERNEL|__GFP_ZERO, + MPP_BUFFER_ORDER); + return vcore; } @@ -1586,6 +1601,33 @@ static int on_primary_thread(void) return 1; } +static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); + logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); + + vc->mpp_buffer_is_valid = true; +} + +static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + /* We must abort any in-progress save operations to ensure + * the table is valid so that prefetch engine knows when to + * stop prefetching. */ + logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. @@ -1663,9 +1705,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vc->kvm->srcu); + if (vc->mpp_buffer_is_valid) + kvmppc_start_restoring_l2_cache(vc); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); + + if (vc->mpp_buffer) + kvmppc_start_saving_l2_cache(vc); + /* disable sending of IPIs on virtual external irqs */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) vcpu->cpu = -1; @@ -2413,8 +2462,14 @@ static void kvmppc_free_vcores(struct kvm *kvm) { long int i; - for (i = 0; i < KVM_MAX_VCORES; ++i) + for (i = 0; i < KVM_MAX_VCORES; ++i) { + if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; + free_pages((unsigned long)vc->mpp_buffer, + MPP_BUFFER_ORDER); + } kfree(kvm->arch.vcores[i]); + } kvm->arch.online_vcores = 0; } -- cgit v0.10.2 From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..cb77f999 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 6a4309b..0729ba6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d687c6e..3ca79aa 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8e03568..d870bac 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; /* FIXME!! diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14f..00268ca 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a8691b..5a62d91 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2616,7 +2616,7 @@ out: return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec4e3bd..5065b95 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); -int kvm_dev_ioctl_check_extension(long ext); +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4b6c01b..e28f3ca 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; -- cgit v0.10.2 From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:33:08 +0200 Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 884f819..8898caf 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl. 4.4 KVM_CHECK_EXTENSION -Capability: basic +Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl Architectures: all -Type: system ioctl +Type: system ioctl, vm ioctl Parameters: extension identifier (KVM_CAP_*) Returns: 0 if unsupported; 1 (or some other positive integer) if supported @@ -160,6 +160,9 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. +Based on their initialization different VMs may have different capabilities. +It is thus encouraged to use the vm ioctl to query for capabilities (available +with KVM_CAP_CHECK_EXTENSION_VM on the vm fd) 4.5 KVM_GET_VCPU_MMAP_SIZE diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0418b74..51776ca 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 #define KVM_CAP_PPC_ENABLE_HCALL 104 +#define KVM_CAP_CHECK_EXTENSION_VM 105 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e28f3ca..1b95cc9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v0.10.2 From 7a58777a33128b28379851b94070934b17ed9176 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:55:19 +0200 Subject: KVM: PPC: Book3S: Provide different CAPs based on HV or PR mode With Book3S KVM we can create both PR and HV VMs in parallel on the same machine. That gives us new challenges on the CAPs we return - both have different capabilities. When we get asked about CAPs on the kvm fd, there's nothing we can do. We can try to be smart and assume we're running HV if HV is available, PR otherwise. However with the newly added VM CHECK_EXTENSION we can now ask for capabilities directly on a VM which knows whether it's PR or HV. With this patch I can successfully expose KVM PVINFO data to user space in the PR case, fixing magic page mapping for PAPR guests. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d870bac..eaa57da 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -394,11 +394,17 @@ void kvm_arch_sync_events(struct kvm *kvm) int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - /* FIXME!! - * Should some of this be vm ioctl ? is it possible now ? - */ + /* Assume we're using HV mode when the HV module is loaded */ int hv_enabled = kvmppc_hv_ops ? 1 : 0; + if (kvm) { + /* + * Hooray - we know which VM type we're running on. Depend on + * that rather than the guess above. + */ + hv_enabled = is_kvmppc_hv_enabled(kvm); + } + switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: -- cgit v0.10.2 From 63fff5c1e3695ff5c9ad8c198a3b03ca4c0c73a8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 29 Jun 2014 16:47:30 +0530 Subject: KVM: PPC: BOOK3S: HV: Update compute_tlbie_rb to handle 16MB base page When calculating the lower bits of AVA field, use the shift count based on the base page size. Also add the missing segment size and remove stale comment. Signed-off-by: Aneesh Kumar K.V Acked-by: Paul Mackerras Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index e504f88..07cf9df 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -147,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, */ /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ + + rb |= v >> (62 - 8); /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -177,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, { int aval_shift; /* - * remaining 7bits of AVA/LP fields + * remaining bits of AVA/LP fields * Also contain the rr bits of LP */ - rb |= (va_low & 0x7f) << 16; + rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; /* * Now clear not needed LP bits based on actual psize */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c470d55..27cced9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2064,12 +2064,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, (*sps)->page_shift = def->shift; (*sps)->slb_enc = def->sllp; (*sps)->enc[0].page_shift = def->shift; - /* - * Only return base page encoding. We don't want to return - * all the supporting pte_enc, because our H_ENTER doesn't - * support MPSS yet. Once they do, we can start passing all - * support pte_enc here - */ (*sps)->enc[0].pte_enc = def->penc[linux_psize]; /* * Add 16MB MPSS support if host supports it -- cgit v0.10.2 From 7d15c06f1abfe4b893c6c2c8a306b02210a6a6db Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:52:36 +0200 Subject: KVM: PPC: Implement kvmppc_xlate for all targets We have a nice API to find the translated GPAs of a GVA including protection flags. So far we only use it on Book3S, but there's no reason the same shouldn't be used on BookE as well. Implement a kvmppc_xlate() version for BookE and clean it up to make it more readable in general. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e381363..1a60af9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,6 +52,16 @@ enum instruction_type { INST_SC, /* system call */ }; +enum xlate_instdata { + XLATE_INST, /* translate instruction address */ + XLATE_DATA /* translate data address */ +}; + +enum xlate_readwrite { + XLATE_READ, /* check for read permissions */ + XLATE_WRITE /* check for write permissions */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -94,6 +104,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); +extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, + enum xlate_instdata xlid, enum xlate_readwrite xlrw, + struct kvmppc_pte *pte); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a3cbada..0b6c84e 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -380,9 +380,11 @@ pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, } EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); -static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, - bool iswrite, struct kvmppc_pte *pte) +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) { + bool data = (xlid == XLATE_DATA); + bool iswrite = (xlrw == XLATE_WRITE); int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR)); int r; @@ -434,7 +436,8 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.st++; - r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte); + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); if (r < 0) return r; @@ -459,7 +462,8 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.ld++; - rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte); + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); if (rc) return rc; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 97bcde2..2f697b4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1785,6 +1785,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) #endif } +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) +{ + int gtlb_index; + gpa_t gpaddr; + +#ifdef CONFIG_KVM_E500V2 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + pte->eaddr = eaddr; + pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) | + (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; + } +#endif + + /* Check the guest TLB. */ + switch (xlid) { + case XLATE_INST: + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); + break; + case XLATE_DATA: + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); + break; + default: + BUG(); + } + + /* Do we have a TLB entry at all? */ + if (gtlb_index < 0) + return -ENOENT; + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + + pte->eaddr = eaddr; + pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + + /* XXX read permissions from the guest TLB */ + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; +} + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { -- cgit v0.10.2 From 35c4a7330dbe1ae6f590a5645b185e35ddb3f6d9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:58:16 +0200 Subject: KVM: PPC: Move kvmppc_ld/st to common code We have enough common infrastructure now to resolve GVA->GPA mappings at runtime. With this we can move our book3s specific helpers to load / store in guest virtual address space to common code as well. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a86ca65..172fd6d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -148,8 +148,8 @@ extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); +/* XXX remove this export when load_last_inst() is generic */ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 11385bb..66f5b59 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -111,15 +111,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; + u32 ld; + u32 st; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; u32 sp_storage; u32 sp_instruc; u32 queue_intr; - u32 ld; u32 ld_slow; - u32 st; u32 st_slow; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1a60af9..17fa277 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -80,6 +80,10 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *inst); +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 0b6c84e..de8da33 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -410,87 +410,6 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, return r; } -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) -{ - hva_t hpage; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return kvmppc_bad_hva(); -} - -int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - int r; - - vcpu->stat.st++; - - r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, - XLATE_WRITE, &pte); - if (r < 0) - return r; - - *eaddr = pte.raddr; - - if (!pte.may_write) - return -EPERM; - - if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) - return EMULATE_DO_MMIO; - - return EMULATE_DONE; -} -EXPORT_SYMBOL_GPL(kvmppc_st); - -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - hva_t hva = *eaddr; - int rc; - - vcpu->stat.ld++; - - rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, - XLATE_READ, &pte); - if (rc) - return rc; - - *eaddr = pte.raddr; - - if (!pte.may_read) - return -EPERM; - - if (!data && !pte.may_execute) - return -ENOEXEC; - - hva = kvmppc_pte_to_hva(vcpu, &pte); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } - - return EMULATE_DONE; - -mmio: - return EMULATE_DO_MMIO; -} -EXPORT_SYMBOL_GPL(kvmppc_ld); - int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *inst) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index eaa57da..2c5a1c3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,6 +309,87 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); +static hva_t kvmppc_bad_hva(void) +{ + return PAGE_OFFSET; +} + +static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + hva_t hpage; + + hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (kvm_is_error_hva(hpage)) + goto err; + + return hpage | (pte->raddr & ~PAGE_MASK); +err: + return kvmppc_bad_hva(); +} + +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + int r; + + vcpu->stat.st++; + + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); + if (r < 0) + return r; + + *eaddr = pte.raddr; + + if (!pte.may_write) + return -EPERM; + + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; +} +EXPORT_SYMBOL_GPL(kvmppc_st); + +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + hva_t hva = *eaddr; + int rc; + + vcpu->stat.ld++; + + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); + if (rc) + return rc; + + *eaddr = pte.raddr; + + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + hva = kvmppc_pte_to_hva(vcpu, &pte); + if (kvm_is_error_hva(hva)) + goto mmio; + + if (copy_from_user(ptr, (void __user *)hva, size)) { + printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); + goto mmio; + } + + return EMULATE_DONE; + +mmio: + return EMULATE_DO_MMIO; +} +EXPORT_SYMBOL_GPL(kvmppc_ld); + int kvm_arch_hardware_enable(void *garbage) { return 0; -- cgit v0.10.2 From 9897e88a79e1c6a3c5bbe74eccd64c4ba32a3b19 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 13:59:43 +0200 Subject: KVM: PPC: Remove kvmppc_bad_hva() We have a proper define for invalid HVA numbers. Use those instead of the ppc specific kvmppc_bad_hva(). Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2c5a1c3..3d59730 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,11 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { hva_t hpage; @@ -324,7 +319,7 @@ static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) return hpage | (pte->raddr & ~PAGE_MASK); err: - return kvmppc_bad_hva(); + return KVM_HVA_ERR_BAD; } int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, -- cgit v0.10.2 From c45c551403f0a7b152e56c53735b954faa36c54c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 14:17:30 +0200 Subject: KVM: PPC: Use kvm_read_guest in kvmppc_ld We have a nice and handy helper to read from guest physical address space, so we should make use of it in kvmppc_ld as we already do for its counterpart in kvmppc_st. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3d59730..be40886 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,19 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) -{ - hva_t hpage; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return KVM_HVA_ERR_BAD; -} - int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { @@ -351,7 +338,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = *eaddr; int rc; vcpu->stat.ld++; @@ -369,19 +355,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!data && !pte.may_execute) return -ENOEXEC; - hva = kvmppc_pte_to_hva(vcpu, &pte); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; return EMULATE_DONE; - -mmio: - return EMULATE_DO_MMIO; } EXPORT_SYMBOL_GPL(kvmppc_ld); -- cgit v0.10.2 From c12fb43c2f6d6a57a4e21afe74ff56485d699ee7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 20 Jun 2014 14:43:36 +0200 Subject: KVM: PPC: Handle magic page in kvmppc_ld/st We use kvmppc_ld and kvmppc_st to emulate load/store instructions that may as well access the magic page. Special case it out so that we can properly access it. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 172fd6d..6166791 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -286,6 +286,13 @@ static inline bool is_kvmppc_resume_guest(int r) return (r == RESUME_GUEST || r == RESUME_GUEST_NV); } +static inline bool is_kvmppc_hv_enabled(struct kvm *kvm); +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Only PR KVM supports the magic page */ + return !is_kvmppc_hv_enabled(vcpu->kvm); +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index cbb1990..f7aa5cc 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -103,4 +103,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } + +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Magic page is only supported on e500v2 */ +#ifdef CONFIG_KVM_E500V2 + return true; +#else + return false; +#endif +} #endif /* __ASM_KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be40886..544d1d3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -312,6 +312,7 @@ EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; int r; @@ -327,6 +328,16 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!pte.may_write) return -EPERM; + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(magic, ptr, size); + return EMULATE_DONE; + } + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) return EMULATE_DO_MMIO; @@ -337,6 +348,7 @@ EXPORT_SYMBOL_GPL(kvmppc_st); int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; int rc; @@ -355,6 +367,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, if (!data && !pte.may_execute) return -ENOEXEC; + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(ptr, magic, size); + return EMULATE_DONE; + } + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) return EMULATE_DO_MMIO; -- cgit v0.10.2 From d69614a295aef72f8fb22da8e3ccf1a8f19a7ffc Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 18 Jun 2014 14:53:49 +0200 Subject: KVM: PPC: Separate loadstore emulation from priv emulation Today the instruction emulator can get called via 2 separate code paths. It can either be called by MMIO emulation detection code or by privileged instruction traps. This is bad, as both code paths prepare the environment differently. For MMIO emulation we already know the virtual address we faulted on, so instructions there don't have to actually fetch that information. Split out the two separate use cases into separate files. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 17fa277..2214ee6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -86,6 +86,7 @@ extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 777f894..1ccd7a1 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -13,8 +13,9 @@ common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. +CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o +common-objs-y += powerpc.o emulate.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o @@ -91,6 +92,7 @@ kvm-book3s_64-module-objs += \ $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ + emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c5c64b6..e96b50d 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -207,25 +207,12 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } -/* XXX to do: - * lhax - * lhaux - * lswx - * lswi - * stswx - * stswi - * lha - * lhau - * lmw - * stmw - * - */ /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst; - int ra, rs, rt, sprn; + int rs, rt, sprn; enum emulation_result emulated; int advance = 1; @@ -238,7 +225,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); - ra = get_ra(inst); rs = get_rs(inst); rt = get_rt(inst); sprn = get_sprn(inst); @@ -270,200 +256,24 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) #endif advance = 0; break; - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; case OP_31_XOP_MFSPR: emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); break; - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_MTSPR: emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: - /* Do nothing. The guest is performing dcbi because - * hardware DMA is not snooped by the dcache, but - * emulated DMA either goes through the dcache as - * normal writes, or the host kernel has handled dcache - * coherence. */ - break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); - break; - case OP_31_XOP_TLBSYNC: break; - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 0); - break; - - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 0); - break; - default: /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; } break; - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ - case OP_LD: - rt = get_rt(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ - case OP_STD: - rs = get_rs(inst); - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 8, 1); - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c new file mode 100644 index 0000000..0de4ffa --- /dev/null +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -0,0 +1,272 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "timing.h" +#include "trace.h" + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + */ +int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 inst; + int ra, rs, rt; + enum emulation_result emulated; + int advance = 1; + + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + case OP_31_XOP_LWZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LBZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_31_XOP_LBZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STWX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_31_XOP_STBX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_31_XOP_STBUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LHAX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STHX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_31_XOP_STHUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_DCBST: + case OP_31_XOP_DCBF: + case OP_31_XOP_DCBI: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case OP_31_XOP_LWBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case OP_31_XOP_STWBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 0); + break; + + case OP_31_XOP_LHBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case OP_31_XOP_STHBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 0); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case OP_LWZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ + case OP_LD: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + + case OP_LWZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LBZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_LBZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STW: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ + case OP_STD: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 8, 1); + break; + + case OP_STWU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STB: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_STBU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_LHZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHA: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STH: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_STHU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + + if (emulated == EMULATE_FAIL) { + advance = 0; + kvmppc_core_queue_program(vcpu, 0); + } + + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); + + /* Advance past emulated instruction. */ + if (advance) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + + return emulated; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 544d1d3..c14ed15 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -272,7 +272,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) enum emulation_result er; int r; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_loadstore(vcpu); switch (er) { case EMULATE_DONE: /* Future optimization: only reload non-volatiles if they were -- cgit v0.10.2 From 8de12015ff75967b16f70e5938b151390dac9b77 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 18 Jun 2014 21:56:55 +0200 Subject: KVM: PPC: Expose helper functions for data/inst faults We're going to implement guest code interpretation in KVM for some rare corner cases. This code needs to be able to inject data and instruction faults into the guest when it encounters them. Expose generic APIs to do this in a reasonably subarch agnostic fashion. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2214ee6..cbee453 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -132,6 +132,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, + ulong esr_flags); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index de8da33..dd03f6b 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -230,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, + ulong flags) +{ + kvmppc_set_dar(vcpu, dar); + kvmppc_set_dsisr(vcpu, flags); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) +{ + u64 msr = kvmppc_get_msr(vcpu); + msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + kvmppc_set_msr_fast(vcpu, msr); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2f697b4..f30948a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -185,24 +185,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, set_bit(priority, &vcpu->arch.pending_exceptions); } -static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); } -static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); } -static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, - ulong esr_flags) +void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); -- cgit v0.10.2 From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 28 Jul 2014 19:29:13 +0200 Subject: KVM: PPC: Remove DCR handling DCR handling was only needed for 440 KVM. Since we removed it, we can also remove handling of DCR accesses. Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 8898caf..a21ff22 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would appear if the VCPU performed a load or store of the appropriate width directly to the byte array. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, - KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and + KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation __u8 is_write; } dcr; -powerpc specific. +Deprecated - was used for 440 KVM. /* KVM_EXIT_OSI */ struct { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 66f5b59..98d9dd5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -94,7 +94,6 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 sum_exits; u32 mmio_exits; - u32 dcr_exits; u32 signal_exits; u32 light_exits; /* Account for special types of light exits: */ @@ -126,7 +125,6 @@ struct kvm_vcpu_stat { enum kvm_exit_types { MMIO_EXITS, - DCR_EXITS, SIGNAL_EXITS, ITLB_REAL_MISS_EXITS, ITLB_VIRT_MISS_EXITS, @@ -601,8 +599,6 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; u8 mmio_sign_extend; - u8 dcr_needed; - u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index cbee453..8e36c1e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -41,7 +41,6 @@ enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ - EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ EMULATE_EXIT_USER, /* emulation requires exit to user-space */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f30948a..b4c89fa 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers; struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio", VCPU_STAT(mmio_exits) }, - { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, @@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EMULATE_AGAIN: return RESUME_GUEST; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - return RESUME_HOST; - case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c14ed15..288b4bb 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) -{ - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); -} - static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; - } else if (vcpu->arch.dcr_needed) { - if (!vcpu->arch.dcr_is_write) - kvmppc_complete_dcr_load(vcpu, run); - vcpu->arch.dcr_needed = 0; } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 07b6110..e44d2b2 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", [SIGNAL_EXITS] = "SIGNAL", [ITLB_REAL_MISS_EXITS] = "ITLBREAL", [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index bf191e7..3123690 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case EMULATED_INST_EXITS: vcpu->stat.emulated_inst_exits++; break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; case DSI_EXITS: vcpu->stat.dsi_exits++; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 51776ca..f6f24ae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -162,7 +162,7 @@ struct kvm_pit_config { #define KVM_EXIT_TPR_ACCESS 12 #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 -#define KVM_EXIT_DCR 15 +#define KVM_EXIT_DCR 15 /* deprecated */ #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 @@ -268,7 +268,7 @@ struct kvm_run { __u64 trans_exc_code; __u32 pgm_code; } s390_ucontrol; - /* KVM_EXIT_DCR */ + /* KVM_EXIT_DCR (deprecated) */ struct { __u32 dcrn; __u32 data; -- cgit v0.10.2 From 5a484c7c1efd2c45f8cc726e4d21283a5324e361 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 30 Jul 2014 15:03:56 +0530 Subject: KVM: PPC: BOOKEHV: rename e500hv_spr to bookehv_spr This are not specific to e500hv but applicable for bookehv (As per comment from Scott Wood on my patch "kvm: ppc: bookehv: Added wrapper macros for shadow registers") Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8e36c1e..fb86a22 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -539,16 +539,16 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } -#define SPRNG_WRAPPER_GET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \ static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ - return mfspr(e500hv_spr); \ + return mfspr(bookehv_spr); \ } \ -#define SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ { \ - mtspr(e500hv_spr, val); \ + mtspr(bookehv_spr, val); \ } \ #define SHARED_WRAPPER_GET(reg, size) \ @@ -573,18 +573,18 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ -#define SPRNG_WRAPPER(reg, e500hv_spr) \ - SPRNG_WRAPPER_GET(reg, e500hv_spr) \ - SPRNG_WRAPPER_SET(reg, e500hv_spr) \ +#define SPRNG_WRAPPER(reg, bookehv_spr) \ + SPRNG_WRAPPER_GET(reg, bookehv_spr) \ + SPRNG_WRAPPER_SET(reg, bookehv_spr) \ #ifdef CONFIG_KVM_BOOKE_HV -#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ - SPRNG_WRAPPER(reg, e500hv_spr) \ +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SPRNG_WRAPPER(reg, bookehv_spr) \ #else -#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr) \ +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ SHARED_WRAPPER(reg, size) \ #endif -- cgit v0.10.2 From 29577fc00ba40a89fc824f030bcc80c583259346 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 30 Jul 2014 15:25:48 +0200 Subject: KVM: PPC: HV: Remove generic instruction emulation Now that we have properly split load/store instruction emulation and generic instruction emulation, we can move the generic one from kvm.ko to kvm-pr.ko on book3s_64. This reduces the attack surface and amount of code loaded on HV KVM kernels. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1ccd7a1..2d590de 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -48,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ kvm-pr-y := \ fpu.o \ + emulate.o \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ @@ -91,7 +92,6 @@ kvm-book3s_64-module-objs += \ $(KVM)/kvm_main.o \ $(KVM)/eventfd.o \ powerpc.o \ - emulate.o \ emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index e1357cd..a674f09 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -291,6 +291,26 @@ TRACE_EVENT(kvm_unmap_hva, TP_printk("unmap hva 0x%lx\n", __entry->hva) ); +TRACE_EVENT(kvm_ppc_instr, + TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate), + TP_ARGS(inst, _pc, emulate), + + TP_STRUCT__entry( + __field( unsigned int, inst ) + __field( unsigned long, pc ) + __field( unsigned int, emulate ) + ), + + TP_fast_assign( + __entry->inst = inst; + __entry->pc = _pc; + __entry->emulate = emulate; + ), + + TP_printk("inst %u pc 0x%lx emulate %u\n", + __entry->inst, __entry->pc, __entry->emulate) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ -- cgit v0.10.2 From 8e6afa36e754be84b468d7df9e5aa71cf4003f3b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 31 Jul 2014 10:21:59 +0200 Subject: KVM: PPC: PR: Handle FSCR feature deselects We handle FSCR feature bits (well, TAR only really today) lazily when the guest starts using them. So when a guest activates the bit and later uses that feature we enable it for real in hardware. However, when the guest stops using that bit we don't stop setting it in hardware. That means we can potentially lose a trap that the guest expects to happen because it thinks a feature is not active. This patch adds support to drop TAR when then guest turns it off in FSCR. While at it it also restricts FSCR access to 64bit systems - 32bit ones don't have it. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6166791..6acf0c2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -182,6 +182,7 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); +extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 84fddcd..5a2bc4b 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -449,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_GQR7: to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: - vcpu->arch.fscr = spr_val; + kvmppc_set_fscr(vcpu, spr_val); break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: vcpu->arch.bescr = spr_val; break; @@ -593,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_GQR7: *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: *spr_val = vcpu->arch.fscr; break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: *spr_val = vcpu->arch.bescr; break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e7a1fa2..faffb27 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -871,6 +871,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) return RESUME_GUEST; } + +void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) +{ + if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { + /* TAR got dropped, drop it in shadow too */ + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } + vcpu->arch.fscr = fscr; +} #endif int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, -- cgit v0.10.2 From f4c321eb268e932786c112e0b902ee942d91a336 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 14:16:39 +0100 Subject: arm64: KVM: GICv3: move system register access to msr_s/mrs_s Commit 72c583951526 (arm64: gicv3: Allow GICv3 compilation with older binutils) changed the way we express the GICv3 system registers, but couldn't change the occurences used by KVM as the code wasn't merged yet. Just fix the accessors. Cc: Will Deacon Cc: Catalin Marinas Cc: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 21e68f6..d160469 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -48,11 +48,11 @@ dsb st // Save all interesting registers - mrs x4, ICH_HCR_EL2 - mrs x5, ICH_VMCR_EL2 - mrs x6, ICH_MISR_EL2 - mrs x7, ICH_EISR_EL2 - mrs x8, ICH_ELSR_EL2 + mrs_s x4, ICH_HCR_EL2 + mrs_s x5, ICH_VMCR_EL2 + mrs_s x6, ICH_MISR_EL2 + mrs_s x7, ICH_EISR_EL2 + mrs_s x8, ICH_ELSR_EL2 str w4, [x3, #VGIC_V3_CPU_HCR] str w5, [x3, #VGIC_V3_CPU_VMCR] @@ -60,9 +60,9 @@ str w7, [x3, #VGIC_V3_CPU_EISR] str w8, [x3, #VGIC_V3_CPU_ELRSR] - msr ICH_HCR_EL2, xzr + msr_s ICH_HCR_EL2, xzr - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 mvn w22, w21 ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 @@ -71,22 +71,22 @@ br x24 1: - mrs x20, ICH_LR15_EL2 - mrs x19, ICH_LR14_EL2 - mrs x18, ICH_LR13_EL2 - mrs x17, ICH_LR12_EL2 - mrs x16, ICH_LR11_EL2 - mrs x15, ICH_LR10_EL2 - mrs x14, ICH_LR9_EL2 - mrs x13, ICH_LR8_EL2 - mrs x12, ICH_LR7_EL2 - mrs x11, ICH_LR6_EL2 - mrs x10, ICH_LR5_EL2 - mrs x9, ICH_LR4_EL2 - mrs x8, ICH_LR3_EL2 - mrs x7, ICH_LR2_EL2 - mrs x6, ICH_LR1_EL2 - mrs x5, ICH_LR0_EL2 + mrs_s x20, ICH_LR15_EL2 + mrs_s x19, ICH_LR14_EL2 + mrs_s x18, ICH_LR13_EL2 + mrs_s x17, ICH_LR12_EL2 + mrs_s x16, ICH_LR11_EL2 + mrs_s x15, ICH_LR10_EL2 + mrs_s x14, ICH_LR9_EL2 + mrs_s x13, ICH_LR8_EL2 + mrs_s x12, ICH_LR7_EL2 + mrs_s x11, ICH_LR6_EL2 + mrs_s x10, ICH_LR5_EL2 + mrs_s x9, ICH_LR4_EL2 + mrs_s x8, ICH_LR3_EL2 + mrs_s x7, ICH_LR2_EL2 + mrs_s x6, ICH_LR1_EL2 + mrs_s x5, ICH_LR0_EL2 adr x24, 1f add x24, x24, x23 @@ -113,34 +113,34 @@ tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP0R3_EL2 + mrs_s x20, ICH_AP0R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs x19, ICH_AP0R2_EL2 + mrs_s x19, ICH_AP0R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs x18, ICH_AP0R1_EL2 +6: mrs_s x18, ICH_AP0R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs x17, ICH_AP0R0_EL2 +5: mrs_s x17, ICH_AP0R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP0R] tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP1R3_EL2 + mrs_s x20, ICH_AP1R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs x19, ICH_AP1R2_EL2 + mrs_s x19, ICH_AP1R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs x18, ICH_AP1R1_EL2 +6: mrs_s x18, ICH_AP1R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs x17, ICH_AP1R0_EL2 +5: mrs_s x17, ICH_AP1R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP1R] // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 orr x5, x5, #ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 isb mov x5, #1 - msr ICC_SRE_EL1, x5 + msr_s ICC_SRE_EL1, x5 .endm /* @@ -150,7 +150,7 @@ .macro restore_vgic_v3_state // Disable SRE_EL1 access. Necessary, otherwise // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... - msr ICC_SRE_EL1, xzr + msr_s ICC_SRE_EL1, xzr isb // Compute the address of struct vgic_cpu @@ -160,34 +160,34 @@ ldr w4, [x3, #VGIC_V3_CPU_HCR] ldr w5, [x3, #VGIC_V3_CPU_VMCR] - msr ICH_HCR_EL2, x4 - msr ICH_VMCR_EL2, x5 + msr_s ICH_HCR_EL2, x4 + msr_s ICH_VMCR_EL2, x5 - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr ICH_AP1R3_EL2, x20 + msr_s ICH_AP1R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr ICH_AP1R2_EL2, x19 + msr_s ICH_AP1R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr ICH_AP1R1_EL2, x18 + msr_s ICH_AP1R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr ICH_AP1R0_EL2, x17 + msr_s ICH_AP1R0_EL2, x17 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr ICH_AP0R3_EL2, x20 + msr_s ICH_AP0R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr ICH_AP0R2_EL2, x19 + msr_s ICH_AP0R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr ICH_AP0R1_EL2, x18 + msr_s ICH_AP0R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr ICH_AP0R0_EL2, x17 + msr_s ICH_AP0R0_EL2, x17 and w22, w21, #0xf mvn w22, w21 @@ -220,22 +220,22 @@ br x24 1: - msr ICH_LR15_EL2, x20 - msr ICH_LR14_EL2, x19 - msr ICH_LR13_EL2, x18 - msr ICH_LR12_EL2, x17 - msr ICH_LR11_EL2, x16 - msr ICH_LR10_EL2, x15 - msr ICH_LR9_EL2, x14 - msr ICH_LR8_EL2, x13 - msr ICH_LR7_EL2, x12 - msr ICH_LR6_EL2, x11 - msr ICH_LR5_EL2, x10 - msr ICH_LR4_EL2, x9 - msr ICH_LR3_EL2, x8 - msr ICH_LR2_EL2, x7 - msr ICH_LR1_EL2, x6 - msr ICH_LR0_EL2, x5 + msr_s ICH_LR15_EL2, x20 + msr_s ICH_LR14_EL2, x19 + msr_s ICH_LR13_EL2, x18 + msr_s ICH_LR12_EL2, x17 + msr_s ICH_LR11_EL2, x16 + msr_s ICH_LR10_EL2, x15 + msr_s ICH_LR9_EL2, x14 + msr_s ICH_LR8_EL2, x13 + msr_s ICH_LR7_EL2, x12 + msr_s ICH_LR6_EL2, x11 + msr_s ICH_LR5_EL2, x10 + msr_s ICH_LR4_EL2, x9 + msr_s ICH_LR3_EL2, x8 + msr_s ICH_LR2_EL2, x7 + msr_s ICH_LR1_EL2, x6 + msr_s ICH_LR0_EL2, x5 // Ensure that the above will have reached the // (re)distributors. This ensure the guest will read @@ -244,9 +244,9 @@ dsb sy // Prevent the guest from touching the GIC system registers - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 and x5, x5, #~ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 .endm ENTRY(__save_vgic_v3_state) @@ -260,7 +260,7 @@ ENTRY(__restore_vgic_v3_state) ENDPROC(__restore_vgic_v3_state) ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs x0, ICH_VTR_EL2 + mrs_s x0, ICH_VTR_EL2 ret ENDPROC(__vgic_v3_get_ich_vtr_el2) -- cgit v0.10.2 From fb3ec67942e92e5713e05b7691b277d0a0c0575d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 11:42:18 +0100 Subject: KVM: arm64: GICv3: mandate page-aligned GICV region Just like GICv2 was fixed in 63afbe7a0ac1 (kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform), mandate the GICV region to be both aligned on a page boundary and its size to be a multiple of page size. This prevents a guest from being able to poke at regions where we have no idea what is sitting there. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index f01d446..1c2c8ee 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node, ret = -ENXIO; goto out; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; -- cgit v0.10.2 From dedf97e8ff2c7513b1370e36b56e08b6bd0f0290 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Aug 2014 12:00:36 +0100 Subject: arm64: KVM: fix 64bit CP15 VM access for 32bit guests Commit f0a3eaff71b8 (ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest) changed the way we handle CP15 VM accesses, so that all 64bit accesses are done via vcpu_sys_reg. This looks like a good idea as it solves indianness issues in an elegant way, except for one small detail: the register index is doesn't refer to the same array! We end up corrupting some random data structure instead. Fix this by reverting to the original code, except for the introduction of a vcpu_cp15_64_high macro that deals with the endianness thing. Tested on Juno with 32bit SMP guests. Cc: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 79812be..e10c45a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -149,9 +149,11 @@ struct kvm_vcpu_arch { #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4fd526..5805e7c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -135,10 +135,13 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32 || !p->is_32bit) + if (!p->is_aarch32) { vcpu_sys_reg(vcpu, r->reg) = val; - else + } else { + if (!p->is_32bit) + vcpu_cp15_64_high(vcpu, r->reg) = val >> 32; vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + } return true; } -- cgit v0.10.2 From 478d66862559bade81cb653af52b9aa53bee2c8a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 Aug 2014 11:29:07 +0200 Subject: KVM: PPC: drop duplicate tracepoint Commit 29577fc00ba4 ("KVM: PPC: HV: Remove generic instruction emulation") caused a build failure with allyesconfig: arch/powerpc/kvm/kvm-pr.o:(__tracepoints+0xa8): multiple definition of `__tracepoint_kvm_ppc_instr' arch/powerpc/kvm/kvm.o:(__tracepoints+0x1c0): first defined here due to a duplicate definition of the tracepoint in trace.h and trace_pr.h. Because the tracepoint is still used by Book3S HV code, and because the PR code does include trace.h, just remove the duplicate definition from trace_pr.h, and export it from kvm.o. Reported-by: Stephen Rothwell Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 288b4bb..4c79284 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1347,3 +1347,5 @@ void kvm_arch_exit(void) { } + +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index a674f09..e1357cd 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -291,26 +291,6 @@ TRACE_EVENT(kvm_unmap_hva, TP_printk("unmap hva 0x%lx\n", __entry->hva) ); -TRACE_EVENT(kvm_ppc_instr, - TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate), - TP_ARGS(inst, _pc, emulate), - - TP_STRUCT__entry( - __field( unsigned int, inst ) - __field( unsigned long, pc ) - __field( unsigned int, emulate ) - ), - - TP_fast_assign( - __entry->inst = inst; - __entry->pc = _pc; - __entry->emulate = emulate; - ), - - TP_printk("inst %u pc 0x%lx emulate %u\n", - __entry->inst, __entry->pc, __entry->emulate) -); - #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ -- cgit v0.10.2 From 56f89f3629ffd1a21d38c3d0bea23deac0e284ce Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:09 +1000 Subject: KVM: Don't keep reference to irq routing table in irqfd struct This makes the irqfd code keep a copy of the irq routing table entry for each irqfd, rather than a reference to the copy in the actual irq routing table maintained in kvm/virt/irqchip.c. This will enable us to change the routing table structure in future, or even not have a routing table at all on some platforms. The synchronization that was previously achieved using srcu_dereference on the read side is now achieved using a seqcount_t structure. That ensures that we don't get a halfway-updated copy of the structure if we read it while another thread is updating it. We still use srcu_read_lock/unlock around the read side so that when changing the routing table we can be sure that after calling synchronize_srcu, nothing will be using the old routing. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 20c3af7..bae593a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "iodev.h" @@ -75,7 +76,8 @@ struct _irqfd { struct kvm *kvm; wait_queue_t wait; /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; @@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; int idx; if (flags & POLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); - irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + if (irq.type == KVM_IRQ_ROUTING_MSI) + kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); @@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, { struct kvm_kernel_irq_routing_entry *e; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + irqfd->irq_entry.type = 0; + if (irqfd->gsi >= irq_rt->nr_rt_entries) + goto out; hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); + irqfd->irq_entry = *e; } + + out: + write_seqcount_end(&irqfd->irq_entry_sc); } static int @@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); f = fdget(args->fd); if (!f.file) { @@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_srcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } -- cgit v0.10.2 From 8ba918d488caded2c4368b0b922eb905fe3bb101 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:10 +1000 Subject: KVM: irqchip: Provide and use accessors for irq routing table This provides accessor functions for the KVM interrupt mappings, in order to reduce the amount of code that accesses the fields of the kvm_irq_routing_table struct, and restrict that code to one file, virt/kvm/irqchip.c. The new functions are kvm_irq_map_gsi(), which maps from a global interrupt number to a set of IRQ routing entries, and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to a global interrupt number. This also moves the update of kvm_irq_routing_table::chip[][] into irqchip.c, out of the various kvm_set_routing_entry implementations. That means that none of the kvm_set_routing_entry implementations need the kvm_irq_routing_table argument anymore, so this removes it. This does not change any locking or data lifetime rules. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b68d0dc..39b3a8f 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 92528a0..f4c819b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1556,8 +1556,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, return ret; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int ret; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5065b95..4956149 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -752,6 +752,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi); +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin); + int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); @@ -942,8 +947,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bae593a..15fa948 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int i, n_entries; + + n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); irqfd->irq_entry.type = 0; - if (irqfd->gsi >= irq_rt->nr_rt_entries) - goto out; - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { + e = entries; + for (i = 0; i < n_entries; ++i, ++e) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) irqfd->irq_entry = *e; } - out: write_seqcount_end(&irqfd->irq_entry_sc); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a228ee8..1758445 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) { + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; @@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) { - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - break; - } + if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin + delta; if (e->irqchip.pin >= max_pin) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b43c275..f4648dd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,13 +31,37 @@ #include #include "irq.h" +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi) +{ + struct kvm_kernel_irq_routing_entry *e; + int n = 0; + + if (gsi < irq_rt->nr_rt_entries) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + entries[n] = *e; + ++n; + } + } + + return n; +} + +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin) +{ + return irq_rt->chip[irqchip][pin]; +} + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0, idx; + struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; + i = kvm_irq_map_gsi(irq_set, irq_rt, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(rt, e, ue); + r = kvm_set_routing_entry(e, ue); if (r) goto out; + if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; -- cgit v0.10.2 From 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:11 +1000 Subject: KVM: Move all accesses to kvm::irq_routing into irqchip.c Now that struct _irqfd does not keep a reference to storage pointed to by the irq_routing field of struct kvm, we can move the statement that updates it out from under the irqfds.lock and put it in kvm_set_irq_routing() instead. That means we then have to take a srcu_read_lock on kvm->irq_srcu around the irqfd_update call in kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer ensures that that the routing can't change. Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin() to take a struct kvm * argument instead of the pointer to the routing table, this allows us to to move all references to kvm->irq_routing into irqchip.c. That in turn allows us to move the definition of the kvm_irq_routing_table struct into irqchip.c as well. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4956149..ddd33e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -325,24 +325,7 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - struct kvm_kernel_irq_routing_entry *rt_entries; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - -#else - -struct kvm_irq_routing_table {}; - -#endif +struct kvm_irq_routing_table; #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 @@ -401,8 +384,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* - * Update side is protected by irq_lock and, - * if configured, irqfds.lock. + * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; @@ -752,10 +734,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi); -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin); +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi); +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); @@ -967,7 +948,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); -void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); +void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { @@ -989,10 +970,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) static inline void kvm_irqfd_release(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +static inline void kvm_irq_routing_update(struct kvm *kvm) { - rcu_assign_pointer(kvm->irq_routing, irq_rt); } #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 15fa948..f0075ff 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int i, n_entries; - n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); @@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm) } /* - * Change irq_routing and irqfd. + * Take note of a change in irq routing. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { struct _irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); + irqfd_update(kvm, irqfd); spin_unlock_irq(&kvm->irqfds.lock); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 1758445..963b899 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; - struct kvm_irq_routing_table *irq_rt; int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * which is limited to 1:1 GSI mapping. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); @@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, int idx, gsi; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f4648dd..04faac5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,12 +31,26 @@ #include #include "irq.h" -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi) +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) { + struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); if (gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; @@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, return n; } -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin) +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; + + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; - struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - i = kvm_irq_map_gsi(irq_set, irq_rt, irq); + i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); + rcu_assign_pointer(kvm->irq_routing, new); + kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); synchronize_srcu_expedited(&kvm->irq_srcu); -- cgit v0.10.2 From e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:12 +1000 Subject: KVM: Move irq notifier implementation into eventfd.c This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(), kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier() from irqchip.c to eventfd.c. The reason for doing this is that those functions are used in connection with IRQFDs, which are implemented in eventfd.c. In future we will want to use IRQFDs on platforms that don't implement the GSI routing implemented in irqchip.c, so we won't be compiling in irqchip.c, but we still need the irq notifiers. The implementation is unchanged. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f0075ff..99957df 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -34,7 +34,9 @@ #include #include #include +#include +#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -865,3 +867,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 04faac5..7f256f3 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) return irq_rt->chip[irqchip][pin]; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; -- cgit v0.10.2 From 297e21053a52f060944e9f0de4c64fad9bcd72fc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:13 +1000 Subject: KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 990b864..3d50ea9 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8f104a6..d4741db 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -158,6 +158,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 10d529a..646db9c 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -26,6 +26,7 @@ config KVM select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 287e4c8..f9d16ff 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ddd33e1..8593d2e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -437,7 +437,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -932,20 +932,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); - #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQCHIP +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 13f2d19..fc0c5e6 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQFD + bool + config HAVE_KVM_IRQ_ROUTING bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 99957df..f5f6154 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -39,7 +39,7 @@ #include "irq.h" #include "iodev.h" -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -450,7 +450,7 @@ out: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1b95cc9..a69a623 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2337,7 +2337,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: -- cgit v0.10.2 From 25a2150bee00b4d996487552948b9b3ba21d0257 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:14 +1000 Subject: KVM: PPC: Enable IRQFD support for the XICS interrupt controller This makes it possible to use IRQFDs on platforms that use the XICS interrupt controller. To do this we implement kvm_irq_map_gsi() and kvm_irq_map_chip_pin() in book3s_xics.c, so as to provide a 1-1 mapping between global interrupt numbers and XICS interrupt source numbers. For now, all interrupts are mapped as "IRQCHIP" interrupts, and no MSI support is provided. This means that kvm_set_irq can now get called with level == 0 or 1 as well as the powerpc-specific values KVM_INTERRUPT_SET, KVM_INTERRUPT_UNSET and KVM_INTERRUPT_SET_LEVEL. We change ics_deliver_irq() to accept all those values, and remove its report_status argument, as it is always false, given that we don't support KVM_IRQ_LINE_STATUS. This also adds support for interrupt ack notifiers to the XICS code so that the IRQFD resampler functionality can be supported. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index d4741db..602eb51 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -170,6 +170,8 @@ config KVM_MPIC config KVM_XICS bool "KVM in-kernel XICS emulation" depends on KVM_BOOK3S_64 && !KVM_MPIC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b4b0082..3ee38e6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -401,6 +401,11 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_REJECT; icp->rm_reject = irq; } + + if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { + icp->rm_action |= XICS_RM_NOTIFY_EOI; + icp->rm_eoied_irq = irq; + } bail: return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d1acd32..eaeb780 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -64,8 +64,12 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); -static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, - bool report_status) +/* + * Return value ideally indicates how the interrupt was handled, but no + * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS), + * so just return 0. + */ +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -82,17 +86,14 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, if (!state->exists) return -EINVAL; - if (report_status) - return state->asserted; - /* * We set state->asserted locklessly. This should be fine as * we are the only setter, thus concurrent access is undefined * to begin with. */ - if (level == KVM_INTERRUPT_SET_LEVEL) + if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL) state->asserted = 1; - else if (level == KVM_INTERRUPT_UNSET) { + else if (level == 0 || level == KVM_INTERRUPT_UNSET) { state->asserted = 0; return 0; } @@ -100,7 +101,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); - return state->asserted; + return 0; } static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, @@ -772,6 +773,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (state->asserted) icp_deliver_irq(xics, icp, irq); + kvm_notify_acked_irq(vcpu->kvm, 0, irq); + return H_SUCCESS; } @@ -789,6 +792,8 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) icp_check_resend(xics, icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); + if (icp->rm_action & XICS_RM_NOTIFY_EOI) + kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); icp->rm_action = 0; @@ -1170,7 +1175,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvmppc_xics *xics = kvm->arch.xics; - return ics_deliver_irq(xics, irq, level, line_status); + return ics_deliver_irq(xics, irq, level); +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi, + level, line_status); } static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) @@ -1301,3 +1315,26 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) vcpu->arch.icp = NULL; vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; } + +static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status); +} + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) +{ + entries->gsi = gsi; + entries->type = KVM_IRQ_ROUTING_IRQCHIP; + entries->set = xics_set_irq; + entries->irqchip.irqchip = 0; + entries->irqchip.pin = gsi; + return 1; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index dd9326c..e8aaa7a 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -71,9 +71,11 @@ struct kvmppc_icp { #define XICS_RM_KICK_VCPU 0x1 #define XICS_RM_CHECK_RESEND 0x2 #define XICS_RM_REJECT 0x4 +#define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; u32 rm_reject; + u32 rm_eoied_irq; /* Debug stuff for real mode */ union kvmppc_icp_state rm_dbgstate; -- cgit v0.10.2 From f3380ca5d7edb5e31932998ab2e29dfdce39c5ed Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 5 Aug 2014 12:42:23 +0800 Subject: KVM: nVMX: Fix nested vmexit ack intr before load vmcs01 An external interrupt will cause a vmexit with reason "external interrupt" when L2 is running. L1 will pick up the interrupt through vmcs12 if L1 set the ack interrupt bit. Commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info if L1 asks us to) retrieves the interrupt that belongs to L1 before vmcs01 is loaded. This will lead to problems in the next patch, which would write to SVI of vmcs02 instead of vmcs01 (SVI of vmcs02 doesn't make sense because L2 runs without APICv). Reviewed-by: Paolo Bonzini Tested-by: Liu, RongrongX Tested-by: Felipe Reyes Fixes: 77b0f5d67ff2781f36831cba79674c3e97bd7acf Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li [Move tracepoint as well. - Paolo] Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e618f34..bfe11cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8754,6 +8754,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); + vmx_load_vmcs01(vcpu); + if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) && nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); @@ -8769,8 +8771,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs12->vm_exit_intr_error_code, KVM_ISA_VMX); - vmx_load_vmcs01(vcpu); - vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); -- cgit v0.10.2 From 56cc2406d68c0f09505c389e276f27a99f495cbd Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 5 Aug 2014 12:42:24 +0800 Subject: KVM: nVMX: fix "acknowledge interrupt on exit" when APICv is in use After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info if L1 asks us to), "Acknowledge interrupt on exit" behavior can be emulated. To do so, KVM will ask the APIC for the interrupt vector if during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set. With APICv, kvm_get_apic_interrupt would return -1 and give the following WARNING: Call Trace: [] dump_stack+0x49/0x5e [] warn_slowpath_common+0x7c/0x96 [] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel] [] warn_slowpath_null+0x15/0x17 [] nested_vmx_vmexit+0xa4/0x233 [kvm_intel] [] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel] [] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm] [] vmx_check_nested_events+0xc3/0xd3 [kvm_intel] [] inject_pending_event+0xd0/0x16e [kvm] [] vcpu_enter_guest+0x319/0x704 [kvm] To fix this, we cannot rely on the processor's virtual interrupt delivery, because "acknowledge interrupt on exit" must only update the virtual ISR/PPR/IRR registers (and SVI, which is just a cache of the virtual ISR) but it should not deliver the interrupt through the IDT. Thus, KVM has to deliver the interrupt "by hand", similar to the treatment of EOI in commit fc57ac2c9ca8 (KVM: lapic: sync highest ISR to hardware apic on EOI, 2014-05-14). The patch modifies kvm_cpu_get_interrupt to always acknowledge an interrupt; there are only two callers, and the other is not affected because it is never reached with kvm_apic_vid_enabled() == true. Then it modifies apic_set_isr and apic_clear_irr to update SVI and RVI in addition to the registers. Suggested-by: Paolo Bonzini Suggested-by: "Zhang, Yang Z" Tested-by: Liu, RongrongX Tested-by: Felipe Reyes Fixes: 77b0f5d67ff2781f36831cba79674c3e97bd7acf Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index bd0da43..a1ec6a50 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) vector = kvm_cpu_get_extint(v); - if (kvm_apic_vid_enabled(v->kvm) || vector != -1) + if (vector != -1) return vector; /* PIC */ return kvm_get_apic_interrupt(v); /* APIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3855103..08e8a89 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { - apic->irr_pending = false; + struct kvm_vcpu *vcpu; + + vcpu = apic->vcpu; + apic_clear_vector(vec, apic->regs + APIC_IRR); - if (apic_search_irr(apic) != -1) - apic->irr_pending = true; + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + /* try to update RVI */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + else { + vec = apic_search_irr(apic); + apic->irr_pending = (vec != -1); + } } static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { - /* Note that we never get here with APIC virtualization enabled. */ + struct kvm_vcpu *vcpu; + + if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) + return; + + vcpu = apic->vcpu; - if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) - ++apic->isr_count; - BUG_ON(apic->isr_count > MAX_APIC_VECTOR); /* - * ISR (in service register) bit is set when injecting an interrupt. - * The highest vector is injected. Thus the latest bit set matches - * the highest bit in ISR. + * With APIC virtualization enabled, all caching is disabled + * because the processor can modify ISR under the hood. Instead + * just set SVI. */ - apic->highest_isr_cache = vec; + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); + else { + ++apic->isr_count; + BUG_ON(apic->isr_count > MAX_APIC_VECTOR); + /* + * ISR (in service register) bit is set when injecting an interrupt. + * The highest vector is injected. Thus the latest bit set matches + * the highest bit in ISR. + */ + apic->highest_isr_cache = vec; + } } static inline int apic_find_highest_isr(struct kvm_lapic *apic) @@ -1627,11 +1648,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; - /* Note that we never get here with APIC virtualization enabled. */ - if (vector == -1) return -1; + /* + * We get here even with APIC virtualization enabled, if doing + * nested virtualization and L1 runs with the "acknowledge interrupt + * on exit" mode. Then we cannot inject the interrupt via RVI, + * because the process would deliver it through the IDT. + */ + apic_set_isr(vector, apic); apic_update_ppr(apic); apic_clear_irr(vector, apic); -- cgit v0.10.2 From c77dcacb397519b6ade8f08201a4a90a7f4f751e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Aug 2014 14:24:45 +0200 Subject: KVM: Move more code under CONFIG_HAVE_KVM_IRQFD Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into eventfd.c, 2014-06-30) included the irq notifier code unconditionally in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before. Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be under CONFIG_HAVE_KVM_IRQCHIP. Together, this broke compilation without CONFIG_KVM_XICS. Fix by adding or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD. Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8593d2e..a4c33b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -388,6 +388,8 @@ struct kvm { */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 131a0bd..908925a 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ #if defined(__KVM_HAVE_IOAPIC) #define kvm_deliver_mode \ @@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq, #endif ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f5f6154..3c5981c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -445,6 +445,67 @@ out: kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} #endif void @@ -867,64 +928,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a69a623..33712fb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -465,6 +465,8 @@ static struct kvm *kvm_create_vm(unsigned long type) #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif -- cgit v0.10.2