From 210552c1bfe83122a480673660d5ca9821c944ae Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Mar 2013 03:18:00 +0000 Subject: ARM: KVM: add support for minimal host vs guest profiling In order to be able to correctly profile what is happening on the host, we need to be able to identify when we're running on the guest, and log these events differently. Perf offers a simple way to register callbacks into KVM. Mimic what x86 does and enjoy being able to profile your KVM host. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643..78813b8 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -207,4 +207,7 @@ static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); } +int kvm_perf_init(void); +int kvm_perf_teardown(void); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76..53c5ed8 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 1497e18..7403f88 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -920,7 +920,10 @@ static int init_hyp_mode(void) if (err) goto out_free_mappings; + kvm_perf_init(); + kvm_info("Hyp mode initialized successfully\n"); + return 0; out_free_vfp: free_percpu(kvm_host_vfp_state); @@ -964,6 +967,7 @@ out_err: /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { + kvm_perf_teardown(); } static int arm_init(void) diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 0000000..1a3849d --- /dev/null +++ b/arch/arm/kvm/perf.c @@ -0,0 +1,68 @@ +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +static int kvm_is_in_guest(void) +{ + return kvm_arm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} -- cgit v0.10.2 From 372b7c1bc80510225ca91cba75bc0850a6e16c39 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Mar 2013 15:56:11 +0000 Subject: ARM: KVM: arch_timer: use symbolic constants In clocksource/arm_arch_timer.h we define useful symbolic constants. Let's use them to make the KVM arch_timer code clearer. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d..c55b608 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ + timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, vcpu->arch.timer_cpu.irq->irq, vcpu->arch.timer_cpu.irq->level); @@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - /* Check if the timer is enabled and unmasked first */ - if ((timer->cntv_ctl & 3) != 1) + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) return; cval = timer->cntv_cval; -- cgit v0.10.2 From 6060df84cbe0d36b8e1415b68e3f67b77f27052a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:01 +0100 Subject: ARM: KVM: simplify HYP mapping population The way we populate HYP mappings is a bit convoluted, to say the least. Passing a pointer around to keep track of the current PFN is quite odd, and we end-up having two different PTE accessors for no good reason. Simplify the whole thing by unifying the two PTE accessors, passing a pgprot_t around, and moving the various validity checks to the upper layers. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e40..3003321 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -125,54 +125,34 @@ void free_hyp_pmds(void) } static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; unsigned long addr; - struct page *page; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(!virt_addr_valid(addr)); - page = virt_to_page(addr); - kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); - } -} - -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end, - unsigned long *pfn_base) -{ - pte_t *pte; - unsigned long addr; - - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(pfn_valid(*pfn_base)); - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); - (*pfn_base)++; + for (addr = start; addr < end; addr += PAGE_SIZE) { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, pfn_pte(pfn, prot)); + pfn++; } } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, - unsigned long end, unsigned long *pfn_base) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pmd = pmd_offset(pud, hyp_addr); + pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, hyp_addr); + pte = pte_alloc_one_kernel(NULL, addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; @@ -182,25 +162,17 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, next = pmd_addr_end(addr, end); - /* - * If pfn_base is NULL, we map kernel pages into HYP with the - * virtual address. Otherwise, this is considered an I/O - * mapping and we map the physical region starting at - * *pfn_base to [start, end[. - */ - if (!pfn_base) - create_hyp_pte_mappings(pmd, addr, next); - else - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; } return 0; } -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) { - unsigned long start = (unsigned long)from; - unsigned long end = (unsigned long)to; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -209,21 +181,14 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) if (start >= end) return -EINVAL; - /* Check for a valid kernel memory mapping */ - if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) - return -EINVAL; - /* Check for a valid kernel IO mapping */ - if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) - return -EINVAL; mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + for (addr = start & PAGE_MASK; addr < end; addr = next) { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, hyp_addr); + pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; @@ -233,9 +198,10 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; + pfn += (next - addr) >> PAGE_SHIFT; } out: mutex_unlock(&kvm_hyp_pgd_mutex); @@ -255,22 +221,38 @@ out: */ int create_hyp_mappings(void *from, void *to) { - return __create_hyp_mappings(from, to, NULL); + unsigned long phys_addr = virt_to_phys(from); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel memory mapping */ + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP); } /** * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode * @from: The kernel start VA of the range * @to: The kernel end VA of the range (exclusive) - * @addr: The physical start address which gets mapped + * @phys_addr: The physical start address which gets mapped * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long pfn = __phys_to_pfn(addr); - return __create_hyp_mappings(from, to, &pfn); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } /** -- cgit v0.10.2 From 3562c76dcb9ce84853c835eec12a911bf3a8e2da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:02 +0100 Subject: ARM: KVM: fix HYP mapping limitations around zero The current code for creating HYP mapping doesn't like to wrap around zero, which prevents from mapping anything into the last page of the virtual address space. It doesn't take much effort to remove this limitation, making the code more consistent with the rest of the kernel in the process. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 3003321..96d61da 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -131,11 +131,12 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, pte_t *pte; unsigned long addr; - for (addr = start; addr < end; addr += PAGE_SIZE) { + addr = start; + do { pte = pte_offset_kernel(pmd, addr); kvm_set_pte(pte, pfn_pte(pfn, prot)); pfn++; - } + } while (addr += PAGE_SIZE, addr != end); } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, @@ -146,7 +147,8 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, pte_t *pte; unsigned long addr, next; - for (addr = start; addr < end; addr = next) { + addr = start; + do { pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); @@ -164,7 +166,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, create_hyp_pte_mappings(pmd, addr, next, pfn, prot); pfn += (next - addr) >> PAGE_SHIFT; - } + } while (addr = next, addr != end); return 0; } @@ -179,11 +181,10 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long addr, next; int err = 0; - if (start >= end) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start & PAGE_MASK; addr < end; addr = next) { + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); @@ -202,7 +203,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; - } + } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; @@ -216,8 +217,6 @@ out: * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. - * - * Note: Wrapping around zero in the "to" address is not supported. */ int create_hyp_mappings(void *from, void *to) { -- cgit v0.10.2 From 2fb410596ca917fe6419be61ee5bc1782b17d947 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:03 +0100 Subject: ARM: KVM: move to a KVM provided HYP idmap After the HYP page table rework, it is pretty easy to let the KVM code provide its own idmap, rather than expecting the kernel to provide it. It takes actually less code to do so. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index 1a66f907..bf863ed 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h @@ -8,7 +8,6 @@ #define __idmap __section(.idmap.text) noinline notrace extern pgd_t *idmap_pgd; -extern pgd_t *hyp_pgd; void setup_mm_for_reboot(void); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 970f3b5..3c71a1d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -21,7 +21,6 @@ #include #include -#include /* * We directly use the kernel VA for the HYP, as we can directly share diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 96d61da..bfc5927 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,6 +32,7 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -715,12 +716,33 @@ phys_addr_t kvm_mmu_get_httbr(void) int kvm_mmu_init(void) { + unsigned long hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); + unsigned long hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); + int err; + + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); if (!hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); - return -ENOMEM; + err = -ENOMEM; + goto out; + } + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(boot_hyp_pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + + if (err) { + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + goto out; } return 0; +out: + kfree(hyp_pgd); + return err; } /** diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 5ee505c..83cb3ac 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -8,7 +8,6 @@ #include #include #include -#include pgd_t *idmap_pgd; @@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, } while (pgd++, addr = next, addr != end); } -#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) -pgd_t *hyp_pgd; - -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - -static int __init init_static_idmap_hyp(void) -{ - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - if (!hyp_pgd) - return -ENOMEM; - - pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", - __hyp_idmap_text_start, __hyp_idmap_text_end); - identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, - __hyp_idmap_text_end, PMD_SECT_AP1); - - return 0; -} -#else -static int __init init_static_idmap_hyp(void) -{ - return 0; -} -#endif - extern char __idmap_text_start[], __idmap_text_end[]; static int __init init_static_idmap(void) { - int ret; - idmap_pgd = pgd_alloc(&init_mm); if (!idmap_pgd) return -ENOMEM; @@ -123,12 +95,10 @@ static int __init init_static_idmap(void) identity_mapping_add(idmap_pgd, __idmap_text_start, __idmap_text_end, 0); - ret = init_static_idmap_hyp(); - /* Flush L1 for the hardware to see this page table content */ flush_cache_louis(); - return ret; + return 0; } early_initcall(init_static_idmap); -- cgit v0.10.2 From 0394e1f605208706e4e1999d06a4570b9f583b7f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:04 +0100 Subject: ARM: KVM: enforce maximum size for identity mapped code We're about to move to an init procedure where we rely on the fact that the init code fits in a single page. Make sure we align the idmap text on a vector alignment, and that the code is not bigger than a single page. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b571484..a871b8e 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -20,7 +20,7 @@ VMLINUX_SYMBOL(__idmap_text_start) = .; \ *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; \ - ALIGN_FUNCTION(); \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ *(.hyp.idmap.text) \ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; @@ -315,3 +315,8 @@ SECTIONS */ ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") +/* + * The HYP init code can't be more than a page long. + * The above comment applies as well. + */ +ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") -- cgit v0.10.2 From 4f728276fbf1e043010485d7e9275082a1c3d650 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:05 +0100 Subject: ARM: KVM: rework HYP page table freeing There is no point in freeing HYP page tables differently from Stage-2. They now have the same requirements, and should be dealt with the same way. Promote unmap_stage2_range to be The One True Way, and get rid of a number of nasty bugs in the process (good thing we never actually called free_hyp_pmds before...). Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3c71a1d..92eb20d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -32,7 +32,7 @@ int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_hyp_pmds(void); +void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 7403f88..16f164a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -928,7 +928,7 @@ static int init_hyp_mode(void) out_free_vfp: free_percpu(kvm_host_vfp_state); out_free_mappings: - free_hyp_pmds(); + free_hyp_pgds(); out_free_stack_pages: for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index bfc5927..7464824 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -72,56 +72,104 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void free_ptes(pmd_t *pmd, unsigned long addr) +static void clear_pud_entry(pud_t *pud) { - pte_t *pte; - unsigned int i; + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { - if (!pmd_none(*pmd) && pmd_table(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - pte_free_kernel(NULL, pte); - } - pmd++; +static void clear_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static bool pmd_empty(pmd_t *pmd) +{ + struct page *pmd_page = virt_to_page(pmd); + return page_count(pmd_page) == 1; +} + +static void clear_pte_entry(pte_t *pte) +{ + if (pte_present(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); } } -static void free_hyp_pgd_entry(unsigned long addr) +static bool pte_empty(pte_t *pte) +{ + struct page *pte_page = virt_to_page(pte); + return page_count(pte_page) == 1; +} + +static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long hyp_addr = KERN_TO_HYP(addr); + pte_t *pte; + unsigned long long addr = start, end = start + size; + u64 range; - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + while (addr < end) { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + addr += PUD_SIZE; + continue; + } - if (pud_none(*pud)) - return; - BUG_ON(pud_bad(*pud)); + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + addr += PMD_SIZE; + continue; + } - pmd = pmd_offset(pud, hyp_addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(pte); + range = PAGE_SIZE; + + /* If we emptied the pte, walk back up the ladder */ + if (pte_empty(pte)) { + clear_pmd_entry(pmd); + range = PMD_SIZE; + if (pmd_empty(pmd)) { + clear_pud_entry(pud); + range = PUD_SIZE; + } + } + + addr += range; + } } /** - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables + * free_hyp_pgds - free Hyp-mode page tables * - * Assumes this is a page table used strictly in Hyp-mode and therefore contains + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and therefore contains * either mappings in the kernel memory area (above PAGE_OFFSET), or * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). */ -void free_hyp_pmds(void) +void free_hyp_pgds(void) { unsigned long addr; mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); + + if (hyp_pgd) { + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + kfree(hyp_pgd); + } + mutex_unlock(&kvm_hyp_pgd_mutex); } @@ -136,6 +184,7 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, do { pte = pte_offset_kernel(pmd, addr); kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); pfn++; } while (addr += PAGE_SIZE, addr != end); } @@ -161,6 +210,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); } next = pmd_addr_end(addr, end); @@ -197,6 +247,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, goto out; } pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); } next = pgd_addr_end(addr, end); @@ -289,42 +340,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -static void clear_pud_entry(pud_t *pud) -{ - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - pmd_free(NULL, pmd_table); - put_page(virt_to_page(pud)); -} - -static void clear_pmd_entry(pmd_t *pmd) -{ - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - pte_free_kernel(NULL, pte_table); - put_page(virt_to_page(pmd)); -} - -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - -static void clear_pte_entry(pte_t *pte) -{ - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - } -} - -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range * @kvm: The VM pointer @@ -338,43 +353,7 @@ static bool pte_empty(pte_t *pte) */ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - phys_addr_t addr = start, end = start + size; - u64 range; - - while (addr < end) { - pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(pte); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(pmd); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(pud); - range = PUD_SIZE; - } - } - - addr += range; - } + unmap_range(kvm->arch.pgd, start, size); } /** @@ -741,7 +720,7 @@ int kvm_mmu_init(void) return 0; out: - kfree(hyp_pgd); + free_hyp_pgds(); return err; } -- cgit v0.10.2 From 5a677ce044f18a341ab942e23516e52ad89f7687 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:06 +0100 Subject: ARM: KVM: switch to a dual-step HYP init code Our HYP init code suffers from two major design issues: - it cannot support CPU hotplug, as we tear down the idmap very early - it cannot perform a TLB invalidation when switching from init to runtime mappings, as pages are manipulated from PL1 exclusively The hotplug problem mandates that we keep two sets of page tables (boot and runtime). The TLB problem mandates that we're able to transition from one PGD to another while in HYP, invalidating the TLBs in the process. To be able to do this, we need to share a page between the two page tables. A page that will have the same VA in both configurations. All we need is a VA that has the following properties: - This VA can't be used to represent a kernel mapping. - This VA will not conflict with the physical address of the kernel text The vectors page seems to satisfy this requirement: - The kernel never maps anything else there - The kernel text being copied at the beginning of the physical memory, it is unlikely to use the last 64kB (I doubt we'll ever support KVM on a system with something like 4MB of RAM, but patches are very welcome). Let's call this VA the trampoline VA. Now, we map our init page at 3 locations: - idmap in the boot pgd - trampoline VA in the boot pgd - trampoline VA in the runtime pgd The init scenario is now the following: - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, runtime stack, runtime vectors - Enable the MMU with the boot pgd - Jump to a target into the trampoline page (remember, this is the same physical page!) - Now switch to the runtime pgd (same VA, and still the same physical page!) - Invalidate TLBs - Set stack and vectors - Profit! (or eret, if you only care about the code). Note that we keep the boot mapping permanently (it is not strictly an idmap anymore) to allow for CPU hotplug in later patches. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 78813b8..6c2a35d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -188,23 +188,30 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, + unsigned long long pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { - unsigned long pgd_low, pgd_high; - - pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); - pgd_high = (pgd_ptr >> 32ULL); - /* - * Call initialization code, and switch to the full blown - * HYP code. The init code doesn't need to preserve these registers as - * r1-r3 and r12 are already callee save according to the AAPCS. - * Note that we slightly misuse the prototype by casing the pgd_low to - * a void *. + * Call initialization code, and switch to the full blown HYP + * code. The init code doesn't need to preserve these + * registers as r0-r3 are already callee saved according to + * the AAPCS. + * Note that we slightly misuse the prototype by casing the + * stack pointer to a void *. + * + * We don't have enough registers to perform the full init in + * one go. Install the boot PGD first, and then install the + * runtime PGD, stack pointer and vectors. The PGDs are always + * passed as the third argument, in order to be passed into + * r2-r3 to the init code (yes, this is compliant with the + * PCS!). */ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + + kvm_call_hyp(NULL, 0, boot_pgd_ptr); + + kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } int kvm_perf_init(void); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 92eb20d..24b767a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -19,17 +19,29 @@ #ifndef __ARM_KVM_MMU_H__ #define __ARM_KVM_MMU_H__ -#include -#include +#include +#include /* * We directly use the kernel VA for the HYP, as we can directly share * the mapping (HTTBR "covers" TTBR1). */ -#define HYP_PAGE_OFFSET_MASK (~0UL) +#define HYP_PAGE_OFFSET_MASK UL(~0) #define HYP_PAGE_OFFSET PAGE_OFFSET #define KERN_TO_HYP(kva) (kva) +/* + * Our virtual mapping for the boot-time MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the vectors + * page, where no kernel data will ever be shared with HYP. + */ +#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) + +#ifndef __ASSEMBLY__ + +#include +#include + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_hyp_pgds(void); @@ -44,6 +56,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -113,4 +127,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) } } +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +#endif /* !__ASSEMBLY__ */ + #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 16f164a..fc47bd7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -787,20 +787,22 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *vector) { + unsigned long long boot_pgd_ptr; unsigned long long pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors((unsigned long)vector); + __hyp_set_vectors(kvm_get_idmap_vector()); + boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); } /** @@ -854,11 +856,6 @@ static int init_hyp_mode(void) } /* - * Unmap the identity mapping - */ - kvm_clear_hyp_idmap(); - - /* * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79..f048338 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -21,13 +21,33 @@ #include #include #include +#include /******************************************************************** * Hypervisor initialization * - should be called with: - * r0,r1 = Hypervisor pgd pointer - * r2 = top of Hyp stack (kernel VA) - * r3 = pointer to hyp vectors + * r0 = top of Hyp stack (kernel VA) + * r1 = pointer to hyp vectors + * r2,r3 = Hypervisor pgd pointer + * + * The init scenario is: + * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, + * runtime stack, runtime vectors + * - Enable the MMU with the boot pgd + * - Jump to a target into the trampoline page (remember, this is the same + * physical page!) + * - Now switch to the runtime pgd (same VA, and still the same physical + * page!) + * - Invalidate TLBs + * - Set stack and vectors + * - Profit! (or eret, if you only care about the code). + * + * As we only have four registers available to pass parameters (and we + * need six), we split the init in two phases: + * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. + * Provides the basic HYP init, and enable the MMU. + * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. + * Switches to the runtime PGD, set stack and vectors. */ .text @@ -47,22 +67,25 @@ __kvm_hyp_init: W(b) . __do_hyp_init: + cmp r0, #0 @ We have a SP? + bne phase2 @ Yes, second stage init + @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r0, r1, c2 + mcrr p15, 4, r2, r3, c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. mrc p15, 4, r0, c2, c0, 2 @ HTCR - ldr r12, =HTCR_MASK - bic r0, r0, r12 + ldr r2, =HTCR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c2, c0, 2 @ TTBCR and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) orr r0, r0, r1 mcr p15, 4, r0, c2, c0, 2 @ HTCR mrc p15, 4, r1, c2, c1, 2 @ VTCR - ldr r12, =VTCR_MASK - bic r1, r1, r12 + ldr r2, =VTCR_MASK + bic r1, r1, r2 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits orr r1, r0, r1 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) @@ -85,24 +108,41 @@ __do_hyp_init: @ - Memory alignment checks: enabled @ - MMU: enabled (this code must be run from an identity mapping) mrc p15, 4, r0, c1, c0, 0 @ HSCR - ldr r12, =HSCTLR_MASK - bic r0, r0, r12 + ldr r2, =HSCTLR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c1, c0, 0 @ SCTLR - ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) - and r1, r1, r12 - ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) - THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) - orr r1, r1, r12 + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) + and r1, r1, r2 + ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) + orr r1, r1, r2 orr r0, r0, r1 isb mcr p15, 4, r0, c1, c0, 0 @ HSCR - isb - @ Set stack pointer and return to the kernel - mov sp, r2 + @ End of init phase-1 + eret + +phase2: + @ Set stack pointer + mov sp, r0 @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r3, c12, c0, 0 @ HVBAR + mcr p15, 4, r1, c12, c0, 0 @ HVBAR + + @ Jump to the trampoline page + ldr r0, =TRAMPOLINE_VA + adr r1, target + bfi r0, r1, #0, #PAGE_SHIFT + mov pc, r0 + +target: @ We're now in the trampoline code, switch page tables + mcrr p15, 4, r2, r3, c2 + isb + + @ Invalidate the old TLBs + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH + dsb eret diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7464824..4646c17 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,9 +32,15 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *boot_hyp_pgd; static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); +static void *init_bounce_page; +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); @@ -152,9 +158,12 @@ static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) /** * free_hyp_pgds - free Hyp-mode page tables * - * Assumes hyp_pgd is a page table used strictly in Hyp-mode and therefore contains - * either mappings in the kernel memory area (above PAGE_OFFSET), or - * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. */ void free_hyp_pgds(void) { @@ -162,6 +171,12 @@ void free_hyp_pgds(void) mutex_lock(&kvm_hyp_pgd_mutex); + if (boot_hyp_pgd) { + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + kfree(boot_hyp_pgd); + } + if (hyp_pgd) { for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); @@ -170,6 +185,7 @@ void free_hyp_pgds(void) kfree(hyp_pgd); } + kfree(init_bounce_page); mutex_unlock(&kvm_hyp_pgd_mutex); } @@ -185,6 +201,7 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, pte = pte_offset_kernel(pmd, addr); kvm_set_pte(pte, pfn_pte(pfn, prot)); get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); pfn++; } while (addr += PAGE_SIZE, addr != end); } @@ -211,6 +228,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, } pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); @@ -248,6 +266,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); @@ -689,18 +708,64 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) phys_addr_t kvm_mmu_get_httbr(void) { - VM_BUG_ON(!virt_addr_valid(hyp_pgd)); return virt_to_phys(hyp_pgd); } +phys_addr_t kvm_mmu_get_boot_httbr(void) +{ + return virt_to_phys(boot_hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + int kvm_mmu_init(void) { - unsigned long hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - unsigned long hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); int err; + hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { + /* + * Our init code is crossing a page boundary. Allocate + * a bounce page, copy the code over and use that. + */ + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; + phys_addr_t phys_base; + + init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_bounce_page) { + kvm_err("Couldn't allocate HYP init bounce page\n"); + err = -ENOMEM; + goto out; + } + + memcpy(init_bounce_page, __hyp_idmap_text_start, len); + /* + * Warning: the code we just copied to the bounce page + * must be flushed to the point of coherency. + * Otherwise, the data may be sitting in L2, and HYP + * mode won't be able to observe it as it runs with + * caches off at that point. + */ + kvm_flush_dcache_to_poc(init_bounce_page, len); + + phys_base = virt_to_phys(init_bounce_page); + hyp_idmap_vector += phys_base - hyp_idmap_start; + hyp_idmap_start = phys_base; + hyp_idmap_end = phys_base + len; + + kvm_info("Using HYP init bounce page @%lx\n", + (unsigned long)phys_base); + } + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - if (!hyp_pgd) { + boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; goto out; @@ -718,39 +783,30 @@ int kvm_mmu_init(void) goto out; } + /* Map the very same page at the trampoline VA */ + err = __create_hyp_mappings(boot_hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } + + /* Map the same page again into the runtime page tables */ + err = __create_hyp_mappings(hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } + return 0; out: free_hyp_pgds(); return err; } - -/** - * kvm_clear_idmap - remove all idmaps from the hyp pgd - * - * Free the underlying pmds for all pgds in range and clear the pgds (but - * don't free them) afterwards. - */ -void kvm_clear_hyp_idmap(void) -{ - unsigned long addr, end; - unsigned long next; - pgd_t *pgd = hyp_pgd; - pud_t *pud; - pmd_t *pmd; - - addr = virt_to_phys(__hyp_idmap_text_start); - end = virt_to_phys(__hyp_idmap_text_end); - - pgd += pgd_index(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - - pud_clear(pud); - kvm_clean_pmd_entry(pmd); - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); - } while (pgd++, addr = next, addr < end); -} -- cgit v0.10.2 From d157f4a5155f4fbd0d1da66b3d2f504c13bd194d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 19:12:07 +0100 Subject: ARM: KVM: perform HYP initilization for hotplugged CPUs Now that we have the necessary infrastructure to boot a hotplugged CPU at any point in time, wire a CPU notifier that will perform the HYP init for the incoming CPU. Note that this depends on the platform code and/or firmware to boot the incoming CPU with HYP mode enabled and return to the kernel by following the normal boot path (HYP stub installed). Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 24b767a..472ac70 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -44,6 +44,7 @@ int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +void free_boot_hyp_pgd(void); void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index fc47bd7..6ea2aed 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include #include #include @@ -785,7 +786,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } } -static void cpu_init_hyp_mode(void *vector) +static void cpu_init_hyp_mode(void *dummy) { unsigned long long boot_pgd_ptr; unsigned long long pgd_ptr; @@ -805,12 +806,28 @@ static void cpu_init_hyp_mode(void *vector) __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); } +static int hyp_init_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + cpu_init_hyp_mode(NULL); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block hyp_init_cpu_nb = { + .notifier_call = hyp_init_cpu_notify, +}; + /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { - phys_addr_t init_phys_addr; int cpu; int err = 0; @@ -843,19 +860,6 @@ static int init_hyp_mode(void) } /* - * Execute the init code on each CPU. - * - * Note: The stack is not mapped yet, so don't do anything else than - * initializing the hypervisor mode on each CPU using a local stack - * space for temporary storage. - */ - init_phys_addr = virt_to_phys(__kvm_hyp_init); - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, cpu_init_hyp_mode, - (void *)(long)init_phys_addr, 1); - } - - /* * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); @@ -900,6 +904,11 @@ static int init_hyp_mode(void) } /* + * Execute the init code on each CPU. + */ + on_each_cpu(cpu_init_hyp_mode, NULL, 1); + + /* * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); @@ -917,6 +926,10 @@ static int init_hyp_mode(void) if (err) goto out_free_mappings; +#ifndef CONFIG_HOTPLUG_CPU + free_boot_hyp_pgd(); +#endif + kvm_perf_init(); kvm_info("Hyp mode initialized successfully\n"); @@ -955,6 +968,12 @@ int kvm_arch_init(void *opaque) if (err) goto out_err; + err = register_cpu_notifier(&hyp_init_cpu_nb); + if (err) { + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + goto out_err; + } + kvm_coproc_table_init(); return 0; out_err: diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 4646c17..9657065 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -156,6 +156,31 @@ static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) } /** + * free_boot_hyp_pgd - free HYP boot page tables + * + * Free the HYP boot page tables. The bounce page is also freed. + */ +void free_boot_hyp_pgd(void) +{ + mutex_lock(&kvm_hyp_pgd_mutex); + + if (boot_hyp_pgd) { + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + kfree(boot_hyp_pgd); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) + unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + + kfree(init_bounce_page); + init_bounce_page = NULL; + + mutex_unlock(&kvm_hyp_pgd_mutex); +} + +/** * free_hyp_pgds - free Hyp-mode page tables * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and @@ -169,13 +194,9 @@ void free_hyp_pgds(void) { unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); + free_boot_hyp_pgd(); - if (boot_hyp_pgd) { - unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); - unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); - } + mutex_lock(&kvm_hyp_pgd_mutex); if (hyp_pgd) { for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) @@ -183,9 +204,9 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); kfree(hyp_pgd); + hyp_pgd = NULL; } - kfree(init_bounce_page); mutex_unlock(&kvm_hyp_pgd_mutex); } -- cgit v0.10.2 From 17b1e31f9201fc102ee3ddd409988e47753e22f9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2013 16:47:18 +0100 Subject: ARM: KVM: add architecture specific hook for capabilities Most of the capabilities are common to both arm and arm64, but we still need to handle the exceptions. Introduce kvm_arch_dev_ioctl_check_extension, which both architectures implement (in the 32bit case, it just returns 0). Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6c2a35d..dcfcbf5 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -214,6 +214,11 @@ static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } +static inline int kvm_arch_dev_ioctl_check_extension(long ext) +{ + return 0; +} + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 6ea2aed..cc67caf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -206,7 +206,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; default: - r = 0; + r = kvm_arch_dev_ioctl_check_extension(ext); break; } return r; -- cgit v0.10.2 From 3de50da6901521f9e520b8eb47d092779512e83c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2013 16:47:19 +0100 Subject: ARM: KVM: promote vfp_host pointer to generic host cpu context We use the vfp_host pointer to store the host VFP context, should the guest start using VFP itself. Actually, we can use this pointer in a more generic way to store CPU speficic data, and arm64 is using it to dump the whole host state before switching to the guest. Simply rename the vfp_host field to host_cpu_context, and the corresponding type to kvm_cpu_context_t. No change in functionnality. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dcfcbf5..57cb786 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info { u32 hyp_pc; /* PC when exception was taken from Hyp mode */ }; -typedef struct vfp_hard_struct kvm_kernel_vfp_t; +typedef struct vfp_hard_struct kvm_cpu_context_t; struct kvm_vcpu_arch { struct kvm_regs regs; @@ -105,8 +105,10 @@ struct kvm_vcpu_arch { struct kvm_vcpu_fault_info fault; /* Floating point registers (VFP and Advanced SIMD/NEON) */ - kvm_kernel_vfp_t vfp_guest; - kvm_kernel_vfp_t *vfp_host; + struct vfp_hard_struct vfp_guest; + + /* Host FP context */ + kvm_cpu_context_t *host_cpu_context; /* VGIC state */ struct vgic_cpu vgic_cpu; diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee1ac39..92562a2 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -154,7 +154,7 @@ int main(void) DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); - DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc67caf..089c0a4 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -49,7 +49,7 @@ __asm__(".arch_extension virt"); #endif static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; +static kvm_cpu_context_t __percpu *kvm_host_cpu_state; static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ @@ -317,7 +317,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; - vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); /* * Check whether this vcpu requires the cache to be flushed on @@ -882,24 +882,24 @@ static int init_hyp_mode(void) } /* - * Map the host VFP structures + * Map the host CPU structures */ - kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); - if (!kvm_host_vfp_state) { + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { err = -ENOMEM; - kvm_err("Cannot allocate host VFP state\n"); + kvm_err("Cannot allocate host CPU state\n"); goto out_free_mappings; } for_each_possible_cpu(cpu) { - kvm_kernel_vfp_t *vfp; + kvm_cpu_context_t *cpu_ctxt; - vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); - err = create_hyp_mappings(vfp, vfp + 1); + cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); if (err) { - kvm_err("Cannot map host VFP state: %d\n", err); - goto out_free_vfp; + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_free_context; } } @@ -913,7 +913,7 @@ static int init_hyp_mode(void) */ err = kvm_vgic_hyp_init(); if (err) - goto out_free_vfp; + goto out_free_context; #ifdef CONFIG_KVM_ARM_VGIC vgic_present = true; @@ -935,8 +935,8 @@ static int init_hyp_mode(void) kvm_info("Hyp mode initialized successfully\n"); return 0; -out_free_vfp: - free_percpu(kvm_host_vfp_state); +out_free_context: + free_percpu(kvm_host_cpu_state); out_free_mappings: free_hyp_pgds(); out_free_stack_pages: -- cgit v0.10.2 From aa404ddf952fa59c07575529ce93435538a3aebe Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 22 Apr 2013 18:57:46 -0700 Subject: KVM: ARM: Fix API documentation for ONE_REG encoding Unless I'm mistaken, the size field was encoded 4 bits off and a wrong value was used for 64-bit FP registers. Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c09d183..59fd9e6 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1814,22 +1814,22 @@ ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: ARM core registers have the following id bit patterns: - 0x4002 0000 0010 + 0x4020 0000 0010 ARM 32-bit CP15 registers have the following id bit patterns: - 0x4002 0000 000F + 0x4020 0000 000F ARM 64-bit CP15 registers have the following id bit patterns: - 0x4003 0000 000F + 0x4030 0000 000F ARM CCSIDR registers are demultiplexed by CSSELR value: - 0x4002 0000 0011 00 + 0x4020 0000 0011 00 ARM 32-bit VFP control registers have the following id bit patterns: - 0x4002 0000 0012 1 + 0x4020 0000 0012 1 ARM 64-bit FP registers have the following id bit patterns: - 0x4002 0000 0012 0 + 0x4030 0000 0012 0 4.69 KVM_GET_ONE_REG -- cgit v0.10.2 From d21a1c83c7595e387545632e44cd7797b76e19cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Apr 2013 18:30:36 +0200 Subject: ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally The CONFIG_KVM_ARM_MAX_VCPUS symbol is needed in order to build the kernel/context_tracking.c code, which includes the vgic data structures implictly through the kvm headers. Definining the symbol to zero on builds without KVM resolves this build error: In file included from include/linux/kvm_host.h:33:0, from kernel/context_tracking.c:18: arch/arm/include/asm/kvm_host.h:28:23: warning: "CONFIG_KVM_ARM_MAX_VCPUS" is not defined [-Wundef] #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS ^ arch/arm/include/asm/kvm_vgic.h:34:24: note: in expansion of macro 'KVM_MAX_VCPUS' #define VGIC_MAX_CPUS KVM_MAX_VCPUS ^ arch/arm/include/asm/kvm_vgic.h:38:6: note: in expansion of macro 'VGIC_MAX_CPUS' #if (VGIC_MAX_CPUS > 8) ^ In file included from arch/arm/include/asm/kvm_host.h:41:0, from include/linux/kvm_host.h:33, from kernel/context_tracking.c:18: arch/arm/include/asm/kvm_vgic.h:59:11: error: 'CONFIG_KVM_ARM_MAX_VCPUS' undeclared here (not in a function) } percpu[VGIC_MAX_CPUS]; ^ Signed-off-by: Arnd Bergmann Cc: Marc Zyngier Cc: Christoffer Dall Cc: Will Deacon Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e..370e1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 + int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST + default 4 if KVM_ARM_HOST + default 0 help Static number of max supported virtual CPUs per VM. -- cgit v0.10.2 From df759217386fcff9c5e45547d1578270fd592a1b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 23 Apr 2013 14:26:31 -0700 Subject: KVM: ARM: Fix spelling in error message s/unkown/unknown/ Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 26ad173..3d74a0b 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -115,7 +115,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unkown exception class: hsr: %#08x\n", + kvm_err("Unknown exception class: hsr: %#08x\n", (unsigned int)kvm_vcpu_get_hsr(vcpu)); BUG(); } -- cgit v0.10.2 From d4e071ce6acf8d5eddb7615a953193a8b0ad7c38 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 17 Apr 2013 12:52:01 +0200 Subject: ARM: KVM: iterate over all CPUs for CPU compatibility check kvm_target_cpus() checks the compatibility of the used CPU with KVM, which is currently limited to ARM Cortex-A15 cores. However by calling it only once on any random CPU it assumes that all cores are the same, which is not necessarily the case (for example in Big.Little). [ I cut some of the commit message and changed the formatting of the code slightly to pass checkpatch and look more like the rest of the kvm/arm init code - Christoffer ] Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 089c0a4..5bc99b4 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -947,21 +947,30 @@ out_err: return err; } +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ int kvm_arch_init(void *opaque) { int err; + int ret, cpu; if (!is_hyp_mode_available()) { kvm_err("HYP mode not available\n"); return -ENODEV; } - if (kvm_target_cpu() < 0) { - kvm_err("Target CPU not supported!\n"); - return -ENODEV; + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } } err = init_hyp_mode(); -- cgit v0.10.2