From ba6a3541545542721ce821d1e7e5ce35752e6fdf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2013 13:52:33 +0200 Subject: KVM: mmu: allow page tables to be in read-only slots Page tables in a read-only memory slot will currently cause a triple fault because the page walker uses gfn_to_hva and it fails on such a slot. OVMF uses such a page table; however, real hardware seems to be fine with that as long as the accessed/dirty bits are set. Save whether the slot is readonly, and later check it when updating the accessed and dirty bits. Reviewed-by: Xiao Guangrong Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0433301..ad75d77 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -99,6 +99,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -309,7 +326,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca645a0..0fbbc7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf040c4..979bff4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_hva); /* - * The hva returned by this function is only allowed to be read. - * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + * If writable is set to false, the hva returned by this function is only + * allowed to be read. */ -static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (writable) + *writable = !memslot_is_readonly(slot); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } @@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; r = kvm_read_hva(data, (void __user *)addr + offset, len); @@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); -- cgit v0.10.2