diff options
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r-- | arch/arm/kvm/mmu.c | 257 |
1 files changed, 44 insertions, 213 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5809069..b0de86b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,7 +19,6 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> -#include <linux/hugetlb.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -42,8 +41,6 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; -#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) - static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -96,29 +93,19 @@ static bool page_empty(void *ptr) static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - if (pud_huge(*pud)) { - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); - } + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - if (kvm_pmd_huge(*pmd)) { - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); - } + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); put_page(virt_to_page(pmd)); } @@ -149,32 +136,18 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, continue; } - if (pud_huge(*pud)) { - /* - * If we are dealing with a huge pud, just clear it and - * move on. - */ - clear_pud_entry(kvm, pud, addr); - addr = pud_addr_end(addr, end); - continue; - } - pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { addr = pmd_addr_end(addr, end); continue; } - if (!kvm_pmd_huge(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; - } + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; - /* - * If the pmd entry is to be cleared, walk back up the ladder - */ - if (kvm_pmd_huge(*pmd) || page_empty(pte)) { + /* If we emptied the pte, walk back up the ladder */ + if (page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { @@ -334,17 +307,6 @@ out: return err; } -static phys_addr_t kvm_kaddr_to_phys(void *kaddr) -{ - if (!is_vmalloc_addr(kaddr)) { - BUG_ON(!virt_addr_valid(kaddr)); - return __pa(kaddr); - } else { - return page_to_phys(vmalloc_to_page(kaddr)) + - offset_in_page(kaddr); - } -} - /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -356,27 +318,16 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) */ int create_hyp_mappings(void *from, void *to) { - phys_addr_t phys_addr; - unsigned long virt_addr; + unsigned long phys_addr = virt_to_phys(from); unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - start = start & PAGE_MASK; - end = PAGE_ALIGN(end); - - for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { - int err; - - phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); - err = __create_hyp_mappings(hyp_pgd, virt_addr, - virt_addr + PAGE_SIZE, - __phys_to_pfn(phys_addr), - PAGE_HYP); - if (err) - return err; - } + /* Check for a valid kernel memory mapping */ + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) + return -EINVAL; - return 0; + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP); } /** @@ -469,71 +420,29 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr) + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; + pte_t *pte, old_pte; + /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return NULL; + return 0; /* ignore calls from kvm_set_spte_hva */ pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - return pmd_offset(pud, addr); -} - -static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache - *cache, phys_addr_t addr, const pmd_t *new_pmd) -{ - pmd_t *pmd, old_pmd; - - pmd = stage2_get_pmd(kvm, cache, addr); - VM_BUG_ON(!pmd); - - /* - * Mapping in huge pages should only happen through a fault. If a - * page is merged into a transparent huge page, the individual - * subpages of that huge page should be unmapped through MMU - * notifiers before we get here. - * - * Merging of CompoundPages is not supported; they should become - * splitting first, unmapped, merged, and mapped back in on-demand. - */ - VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); - - old_pmd = *pmd; - kvm_set_pmd(pmd, *new_pmd); - if (pmd_present(old_pmd)) - kvm_tlb_flush_vmid_ipa(kvm, addr); - else - get_page(virt_to_page(pmd)); - return 0; -} - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) -{ - pmd_t *pmd; - pte_t *pte, old_pte; - - /* Create stage-2 page table mapping - Level 1 */ - pmd = stage2_get_pmd(kvm, cache, addr); - if (!pmd) { - /* - * Ignore calls from kvm_set_spte_hva for unallocated - * address ranges. - */ - return 0; - } + pmd = pmd_offset(pud, addr); - /* Create stage-2 page mappings - Level 2 */ + /* Create 2nd stage page table mapping - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -598,60 +507,16 @@ out: return ret; } -static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) -{ - pfn_t pfn = *pfnp; - gfn_t gfn = *ipap >> PAGE_SHIFT; - - if (PageTransCompound(pfn_to_page(pfn))) { - unsigned long mask; - /* - * The address we faulted on is backed by a transparent huge - * page. However, because we map the compound huge page and - * not the individual tail page, we need to transfer the - * refcount to the head page. We have to be careful that the - * THP doesn't start to split while we are adjusting the - * refcounts. - * - * We are sure this doesn't happen, because mmu_notifier_retry - * was successful and we are holding the mmu_lock, so if this - * THP is trying to split, it will be blocked in the mmu - * notifier before touching any of the pages, specifically - * before being able to call __split_huge_page_refcount(). - * - * We can therefore safely transfer the refcount from PG_tail - * to PG_head and switch the pfn from a tail page to the head - * page accordingly. - */ - mask = PTRS_PER_PMD - 1; - VM_BUG_ON((gfn & mask) != (pfn & mask)); - if (pfn & mask) { - *ipap &= PMD_MASK; - kvm_release_pfn_clean(pfn); - pfn &= ~mask; - kvm_get_pfn(pfn); - *pfnp = pfn; - } - - return true; - } - - return false; -} - static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_memory_slot *memslot, + gfn_t gfn, struct kvm_memory_slot *memslot, unsigned long fault_status) { + pte_t new_pte; + pfn_t pfn; int ret; - bool write_fault, writable, hugetlb = false, force_pte = false; + bool write_fault, writable; unsigned long mmu_seq; - gfn_t gfn = fault_ipa >> PAGE_SHIFT; - unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); - struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; - struct vm_area_struct *vma; - pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -659,26 +524,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* Let's check if we will get back a huge page backed by hugetlbfs */ - down_read(¤t->mm->mmap_sem); - vma = find_vma_intersection(current->mm, hva, hva + 1); - if (is_vm_hugetlb_page(vma)) { - hugetlb = true; - gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; - } else { - /* - * Pages belonging to VMAs not aligned to the PMD mapping - * granularity cannot be mapped using block descriptors even - * if the pages belong to a THP for the process, because the - * stage-2 block descriptor will cover more than a single THP - * and we loose atomicity for unmapping, updates, and splits - * of the THP or other pages in the stage-2 block range. - */ - if (vma->vm_start & ~PMD_MASK) - force_pte = true; - } - up_read(¤t->mm->mmap_sem); - /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -696,40 +541,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + new_pte = pfn_pte(pfn, PAGE_S2); + coherent_icache_guest_page(vcpu->kvm, gfn); + + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - if (!hugetlb && !force_pte) - hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); - - if (hugetlb) { - pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); - new_pmd = pmd_mkhuge(new_pmd); - if (writable) { - kvm_set_s2pmd_writable(&new_pmd); - kvm_set_pfn_dirty(pfn); - } - coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); - } else { - pte_t new_pte = pfn_pte(pfn, PAGE_S2); - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); - } - coherent_icache_guest_page(kvm, hva, PAGE_SIZE); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); } - + stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); out_unlock: - spin_unlock(&kvm->mmu_lock); + spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return ret; + return 0; } /** @@ -798,7 +629,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) memslot = gfn_to_memslot(vcpu->kvm, gfn); - ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: |