summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/include/asm/kvm_mmu.h43
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h61
-rw-r--r--arch/arm/kvm/arm.c2
-rw-r--r--arch/arm/kvm/mmu.c408
-rw-r--r--arch/arm64/include/asm/kvm_arm.h85
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h111
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h80
-rw-r--r--arch/arm64/include/asm/pgtable.h15
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopmd.h42
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopud.h39
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h142
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c8
-rw-r--r--drivers/clocksource/arm_arch_timer.c11
-rw-r--r--drivers/irqchip/irq-gic-common.c13
-rw-r--r--drivers/irqchip/irq-gic-common.h3
-rw-r--r--drivers/irqchip/irq-gic-v3.c176
-rw-r--r--drivers/irqchip/irq-gic.c87
-rw-r--r--include/clocksource/arm_arch_timer.h12
-rw-r--r--include/kvm/arm_vgic.h7
-rw-r--r--include/linux/irqchip/arm-gic-common.h34
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/vgic-v2.c61
-rw-r--r--virt/kvm/arm/vgic-v3.c47
-rw-r--r--virt/kvm/arm/vgic.c50
25 files changed, 1053 insertions, 525 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index da44be9..ef0b276 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -47,6 +47,7 @@
#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
+#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
@@ -105,14 +106,16 @@ static inline void kvm_clean_pte(pte_t *pte)
clean_pte_table(pte);
}
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
- pte_val(*pte) |= L_PTE_S2_RDWR;
+ pte_val(pte) |= L_PTE_S2_RDWR;
+ return pte;
}
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
{
- pmd_val(*pmd) |= L_PMD_S2_RDWR;
+ pmd_val(pmd) |= L_PMD_S2_RDWR;
+ return pmd;
}
static inline void kvm_set_s2pte_readonly(pte_t *pte)
@@ -135,22 +138,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
}
-
-/* Open coded p*d_addr_end that can deal with 64bit addresses */
-#define kvm_pgd_addr_end(addr, end) \
-({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-
-#define kvm_pud_addr_end(addr,end) (end)
-
-#define kvm_pmd_addr_end(addr, end) \
-({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-
-#define kvm_pgd_index(addr) pgd_index(addr)
-
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -159,19 +146,11 @@ static inline bool kvm_page_empty(void *ptr)
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define kvm_pud_table_empty(kvm, pudp) (0)
-
-#define KVM_PREALLOC_LEVEL 0
+#define kvm_pud_table_empty(kvm, pudp) false
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
- return kvm->arch.pgd;
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
- return PTRS_PER_S2_PGD * sizeof(pgd_t);
-}
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define hyp_pud_table_empty(pudp) false
struct kvm;
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
new file mode 100644
index 0000000..460d616
--- /dev/null
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_S2_PGTABLE_H_
+#define __ARM_S2_PGTABLE_H_
+
+#define stage2_pgd_none(pgd) pgd_none(pgd)
+#define stage2_pgd_clear(pgd) pgd_clear(pgd)
+#define stage2_pgd_present(pgd) pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
+#define stage2_pud_free(pud) pud_free(NULL, pud)
+
+#define stage2_pud_none(pud) pud_none(pud)
+#define stage2_pud_clear(pud) pud_clear(pud)
+#define stage2_pud_present(pud) pud_present(pud)
+#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
+#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud) pud_huge(pud)
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pud_addr_end(addr, end) (end)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pgd_index(addr) pgd_index(addr)
+
+#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define stage2_pud_table_empty(pudp) false
+
+#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dded1b7..be4b639 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -448,7 +448,7 @@ static void update_vttbr(struct kvm *kvm)
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
/* update vttbr to be used with the new vmid */
- pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
+ pgd_phys = virt_to_phys(kvm->arch.pgd);
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 58dbd5c..783e5ff 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,11 +43,9 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
-#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
-#define kvm_pud_huge(_x) pud_huge(_x)
-
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
@@ -69,14 +67,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
- /*
- * This function also gets called when dealing with HYP page
- * tables. As HYP doesn't have an associated struct kvm (and
- * the HYP page tables are fairly static), we don't do
- * anything there.
- */
- if (kvm)
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
}
/*
@@ -115,7 +106,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
*/
static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
{
- if (!kvm_pmd_huge(*pmd))
+ if (!pmd_thp_or_huge(*pmd))
return;
pmd_clear(pmd);
@@ -155,29 +146,29 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
-static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
{
- pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
- pgd_clear(pgd);
+ pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
+ stage2_pgd_clear(pgd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- pud_free(NULL, pud_table);
+ stage2_pud_free(pud_table);
put_page(virt_to_page(pgd));
}
-static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
- pmd_t *pmd_table = pmd_offset(pud, 0);
- VM_BUG_ON(pud_huge(*pud));
- pud_clear(pud);
+ pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
+ VM_BUG_ON(stage2_pud_huge(*pud));
+ stage2_pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- pmd_free(NULL, pmd_table);
+ stage2_pmd_free(pmd_table);
put_page(virt_to_page(pud));
}
-static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
- VM_BUG_ON(kvm_pmd_huge(*pmd));
+ VM_BUG_ON(pmd_thp_or_huge(*pmd));
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
pte_free_kernel(NULL, pte_table);
@@ -204,7 +195,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
* the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
* the IO subsystem will never hit in the cache.
*/
-static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t start_addr = addr;
@@ -226,21 +217,21 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (kvm_pte_table_empty(kvm, start_pte))
- clear_pmd_entry(kvm, pmd, start_addr);
+ if (stage2_pte_table_empty(start_pte))
+ clear_stage2_pmd_entry(kvm, pmd, start_addr);
}
-static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t next, start_addr = addr;
pmd_t *pmd, *start_pmd;
- start_pmd = pmd = pmd_offset(pud, addr);
+ start_pmd = pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
+ if (pmd_thp_or_huge(*pmd)) {
pmd_t old_pmd = *pmd;
pmd_clear(pmd);
@@ -250,57 +241,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
put_page(virt_to_page(pmd));
} else {
- unmap_ptes(kvm, pmd, addr, next);
+ unmap_stage2_ptes(kvm, pmd, addr, next);
}
}
} while (pmd++, addr = next, addr != end);
- if (kvm_pmd_table_empty(kvm, start_pmd))
- clear_pud_entry(kvm, pud, start_addr);
+ if (stage2_pmd_table_empty(start_pmd))
+ clear_stage2_pud_entry(kvm, pud, start_addr);
}
-static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t next, start_addr = addr;
pud_t *pud, *start_pud;
- start_pud = pud = pud_offset(pgd, addr);
+ start_pud = pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
- if (pud_huge(*pud)) {
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
+ if (stage2_pud_huge(*pud)) {
pud_t old_pud = *pud;
- pud_clear(pud);
+ stage2_pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
-
kvm_flush_dcache_pud(old_pud);
-
put_page(virt_to_page(pud));
} else {
- unmap_pmds(kvm, pud, addr, next);
+ unmap_stage2_pmds(kvm, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
- if (kvm_pud_table_empty(kvm, start_pud))
- clear_pgd_entry(kvm, pgd, start_addr);
+ if (stage2_pud_table_empty(start_pud))
+ clear_stage2_pgd_entry(kvm, pgd, start_addr);
}
-
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
- phys_addr_t start, u64 size)
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm: The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size: The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
{
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
- next = kvm_pgd_addr_end(addr, end);
- if (!pgd_none(*pgd))
- unmap_puds(kvm, pgd, addr, next);
+ next = stage2_pgd_addr_end(addr, end);
+ if (!stage2_pgd_none(*pgd))
+ unmap_stage2_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -322,11 +320,11 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
pmd_t *pmd;
phys_addr_t next;
- pmd = pmd_offset(pud, addr);
+ pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd))
+ if (pmd_thp_or_huge(*pmd))
kvm_flush_dcache_pmd(*pmd);
else
stage2_flush_ptes(kvm, pmd, addr, next);
@@ -340,11 +338,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
pud_t *pud;
phys_addr_t next;
- pud = pud_offset(pgd, addr);
+ pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
- if (pud_huge(*pud))
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
+ if (stage2_pud_huge(*pud))
kvm_flush_dcache_pud(*pud);
else
stage2_flush_pmds(kvm, pud, addr, next);
@@ -360,9 +358,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
- next = kvm_pgd_addr_end(addr, end);
+ next = stage2_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -391,6 +389,100 @@ static void stage2_flush_vm(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
}
+static void clear_hyp_pgd_entry(pgd_t *pgd)
+{
+ pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
+ pgd_clear(pgd);
+ pud_free(NULL, pud_table);
+ put_page(virt_to_page(pgd));
+}
+
+static void clear_hyp_pud_entry(pud_t *pud)
+{
+ pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
+ VM_BUG_ON(pud_huge(*pud));
+ pud_clear(pud);
+ pmd_free(NULL, pmd_table);
+ put_page(virt_to_page(pud));
+}
+
+static void clear_hyp_pmd_entry(pmd_t *pmd)
+{
+ pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ VM_BUG_ON(pmd_thp_or_huge(*pmd));
+ pmd_clear(pmd);
+ pte_free_kernel(NULL, pte_table);
+ put_page(virt_to_page(pmd));
+}
+
+static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+ pte_t *pte, *start_pte;
+
+ start_pte = pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ kvm_set_pte(pte, __pte(0));
+ put_page(virt_to_page(pte));
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ if (hyp_pte_table_empty(start_pte))
+ clear_hyp_pmd_entry(pmd);
+}
+
+static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t next;
+ pmd_t *pmd, *start_pmd;
+
+ start_pmd = pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ /* Hyp doesn't use huge pmds */
+ if (!pmd_none(*pmd))
+ unmap_hyp_ptes(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+
+ if (hyp_pmd_table_empty(start_pmd))
+ clear_hyp_pud_entry(pud);
+}
+
+static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t next;
+ pud_t *pud, *start_pud;
+
+ start_pud = pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ /* Hyp doesn't use huge puds */
+ if (!pud_none(*pud))
+ unmap_hyp_pmds(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+
+ if (hyp_pud_table_empty(start_pud))
+ clear_hyp_pgd_entry(pgd);
+}
+
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ pgd_t *pgd;
+ phys_addr_t addr = start, end = start + size;
+ phys_addr_t next;
+
+ /*
+ * We don't unmap anything from HYP, except at the hyp tear down.
+ * Hence, we don't have to invalidate the TLBs here.
+ */
+ pgd = pgdp + pgd_index(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (!pgd_none(*pgd))
+ unmap_hyp_puds(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
/**
* free_boot_hyp_pgd - free HYP boot page tables
*
@@ -401,14 +493,14 @@ void free_boot_hyp_pgd(void)
mutex_lock(&kvm_hyp_pgd_mutex);
if (boot_hyp_pgd) {
- unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
- unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+ unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+ unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd)
- unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+ unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
mutex_unlock(&kvm_hyp_pgd_mutex);
}
@@ -433,9 +525,9 @@ void free_hyp_pgds(void)
if (hyp_pgd) {
for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
- unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+ unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
- unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+ unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
@@ -645,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
-/* Free the HW pgd, one page at a time */
-static void kvm_free_hwpgd(void *hwpgd)
-{
- free_pages_exact(hwpgd, kvm_get_hwpgd_size());
-}
-
-/* Allocate the HW PGD, making sure that each page gets its own refcount */
-static void *kvm_alloc_hwpgd(void)
-{
- unsigned int size = kvm_get_hwpgd_size();
-
- return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
-}
-
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -673,81 +751,22 @@ static void *kvm_alloc_hwpgd(void)
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
pgd_t *pgd;
- void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
- hwpgd = kvm_alloc_hwpgd();
- if (!hwpgd)
+ /* Allocate the HW PGD, making sure that each page gets its own refcount */
+ pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
+ if (!pgd)
return -ENOMEM;
- /* When the kernel uses more levels of page tables than the
- * guest, we allocate a fake PGD and pre-populate it to point
- * to the next-level page table, which will be the real
- * initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
- * the PMD and the kernel will use folded pud.
- * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
- * pages.
- */
- if (KVM_PREALLOC_LEVEL > 0) {
- int i;
-
- /*
- * Allocate fake pgd for the page table manipulation macros to
- * work. This is not used by the hardware and we have no
- * alignment requirement for this allocation.
- */
- pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
- GFP_KERNEL | __GFP_ZERO);
-
- if (!pgd) {
- kvm_free_hwpgd(hwpgd);
- return -ENOMEM;
- }
-
- /* Plug the HW PGD into the fake one. */
- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
- if (KVM_PREALLOC_LEVEL == 1)
- pgd_populate(NULL, pgd + i,
- (pud_t *)hwpgd + i * PTRS_PER_PUD);
- else if (KVM_PREALLOC_LEVEL == 2)
- pud_populate(NULL, pud_offset(pgd, 0) + i,
- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
- }
- } else {
- /*
- * Allocate actual first-level Stage-2 page table used by the
- * hardware for Stage-2 page table walks.
- */
- pgd = (pgd_t *)hwpgd;
- }
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
}
-/**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
- * @kvm: The VM pointer
- * @start: The intermediate physical base address of the range to unmap
- * @size: The size of the area to unmap
- *
- * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
- * be called while holding mmu_lock (unless for freeing the stage2 pgd before
- * destroying the VM), otherwise another faulting VCPU may come in and mess
- * with things behind our backs.
- */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
-{
- unmap_range(kvm, kvm->arch.pgd, start, size);
-}
-
static void stage2_unmap_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
@@ -830,10 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm_get_hwpgd(kvm));
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(kvm->arch.pgd);
-
+ /* Free the HW pgd, one page at a time */
+ free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL;
}
@@ -843,16 +860,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
- if (WARN_ON(pgd_none(*pgd))) {
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+ if (WARN_ON(stage2_pgd_none(*pgd))) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
- pgd_populate(NULL, pgd, pud);
+ stage2_pgd_populate(pgd, pud);
get_page(virt_to_page(pgd));
}
- return pud_offset(pgd, addr);
+ return stage2_pud_offset(pgd, addr);
}
static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -862,15 +879,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pmd_t *pmd;
pud = stage2_get_pud(kvm, cache, addr);
- if (pud_none(*pud)) {
+ if (stage2_pud_none(*pud)) {
if (!cache)
return NULL;
pmd = mmu_memory_cache_alloc(cache);
- pud_populate(NULL, pud, pmd);
+ stage2_pud_populate(pud, pmd);
get_page(virt_to_page(pud));
}
- return pmd_offset(pud, addr);
+ return stage2_pmd_offset(pud, addr);
}
static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -893,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
old_pmd = *pmd;
- kvm_set_pmd(pmd, *new_pmd);
- if (pmd_present(old_pmd))
+ if (pmd_present(old_pmd)) {
+ pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- else
+ } else {
get_page(virt_to_page(pmd));
+ }
+
+ kvm_set_pmd(pmd, *new_pmd);
return 0;
}
@@ -946,15 +966,38 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
- kvm_set_pte(pte, *new_pte);
- if (pte_present(old_pte))
+ if (pte_present(old_pte)) {
+ kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
- else
+ } else {
get_page(virt_to_page(pte));
+ }
+ kvm_set_pte(pte, *new_pte);
return 0;
}
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+ if (pte_young(*pte)) {
+ *pte = pte_mkold(*pte);
+ return 1;
+ }
+ return 0;
+}
+#else
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+ return __ptep_test_and_clear_young(pte);
+}
+#endif
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+ return stage2_ptep_test_and_clear_young((pte_t *)pmd);
+}
+
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
@@ -978,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
if (writable)
- kvm_set_s2pte_writable(&pte);
+ pte = kvm_s2pte_mkwrite(pte);
ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
@@ -1078,12 +1121,12 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
pmd_t *pmd;
phys_addr_t next;
- pmd = pmd_offset(pud, addr);
+ pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
+ if (pmd_thp_or_huge(*pmd)) {
if (!kvm_s2pmd_readonly(pmd))
kvm_set_s2pmd_readonly(pmd);
} else {
@@ -1106,12 +1149,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
pud_t *pud;
phys_addr_t next;
- pud = pud_offset(pgd, addr);
+ pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
/* TODO:PUD not supported, revisit later if supported */
- BUG_ON(kvm_pud_huge(*pud));
+ BUG_ON(stage2_pud_huge(*pud));
stage2_wp_pmds(pud, addr, next);
}
} while (pud++, addr = next, addr != end);
@@ -1128,7 +1171,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1140,8 +1183,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
cond_resched_lock(&kvm->mmu_lock);
- next = kvm_pgd_addr_end(addr, end);
- if (pgd_present(*pgd))
+ next = stage2_pgd_addr_end(addr, end);
+ if (stage2_pgd_present(*pgd))
stage2_wp_puds(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -1320,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pmd_t new_pmd = pfn_pmd(pfn, mem_type);
new_pmd = pmd_mkhuge(new_pmd);
if (writable) {
- kvm_set_s2pmd_writable(&new_pmd);
+ new_pmd = kvm_s2pmd_mkwrite(new_pmd);
kvm_set_pfn_dirty(pfn);
}
coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
@@ -1329,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pte_t new_pte = pfn_pte(pfn, mem_type);
if (writable) {
- kvm_set_s2pte_writable(&new_pte);
+ new_pte = kvm_s2pte_mkwrite(new_pte);
kvm_set_pfn_dirty(pfn);
mark_page_dirty(kvm, gfn);
}
@@ -1348,6 +1391,8 @@ out_unlock:
* Resolve the access fault by making the page young again.
* Note that because the faulting entry is guaranteed not to be
* cached in the TLB, we don't need to invalidate anything.
+ * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
+ * so there is no need for atomic (pte|pmd)_mkyoung operations.
*/
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
@@ -1364,7 +1409,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
goto out;
- if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
*pmd = pmd_mkyoung(*pmd);
pfn = pmd_pfn(*pmd);
pfn_valid = true;
@@ -1588,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
- if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
- if (pmd_young(*pmd)) {
- *pmd = pmd_mkold(*pmd);
- return 1;
- }
-
- return 0;
- }
+ if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
+ return stage2_pmdp_test_and_clear_young(pmd);
pte = pte_offset_kernel(pmd, gpa);
if (pte_none(*pte))
return 0;
- if (pte_young(*pte)) {
- *pte = pte_mkold(*pte); /* Just a page... */
- return 1;
- }
-
- return 0;
+ return stage2_ptep_test_and_clear_young(pte);
}
static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
@@ -1618,7 +1652,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
- if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */
+ if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
return pmd_young(*pmd);
pte = pte_offset_kernel(pmd, gpa);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3f29887..ffde15f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -96,32 +96,37 @@
SCTLR_EL2_SA | SCTLR_EL2_I)
/* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
-#define TCR_EL2_TBI (1 << 20)
-#define TCR_EL2_PS (7 << 16)
-#define TCR_EL2_PS_40B (2 << 16)
-#define TCR_EL2_TG0 (1 << 14)
-#define TCR_EL2_SH0 (3 << 12)
-#define TCR_EL2_ORGN0 (3 << 10)
-#define TCR_EL2_IRGN0 (3 << 8)
-#define TCR_EL2_T0SZ 0x3f
-#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
- TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
+#define TCR_EL2_TBI (1 << 20)
+#define TCR_EL2_PS_SHIFT 16
+#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_TG0_MASK TCR_TG0_MASK
+#define TCR_EL2_SH0_MASK TCR_SH0_MASK
+#define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define TCR_EL2_T0SZ_MASK 0x3f
+#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
+ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_RES1 (1 << 31)
-#define VTCR_EL2_PS_MASK (7 << 16)
-#define VTCR_EL2_TG0_MASK (1 << 14)
-#define VTCR_EL2_TG0_4K (0 << 14)
-#define VTCR_EL2_TG0_64K (1 << 14)
-#define VTCR_EL2_SH0_MASK (3 << 12)
-#define VTCR_EL2_SH0_INNER (3 << 12)
-#define VTCR_EL2_ORGN0_MASK (3 << 10)
-#define VTCR_EL2_ORGN0_WBWA (1 << 10)
-#define VTCR_EL2_IRGN0_MASK (3 << 8)
-#define VTCR_EL2_IRGN0_WBWA (1 << 8)
-#define VTCR_EL2_SL0_MASK (3 << 6)
-#define VTCR_EL2_SL0_LVL1 (1 << 6)
+#define VTCR_EL2_HD (1 << 22)
+#define VTCR_EL2_HA (1 << 21)
+#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
+#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
+#define VTCR_EL2_TG0_4K TCR_TG0_4K
+#define VTCR_EL2_TG0_16K TCR_TG0_16K
+#define VTCR_EL2_TG0_64K TCR_TG0_64K
+#define VTCR_EL2_SH0_MASK TCR_SH0_MASK
+#define VTCR_EL2_SH0_INNER TCR_SH0_INNER
+#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA
+#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
+#define VTCR_EL2_SL0_SHIFT 6
+#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS_SHIFT 19
@@ -137,35 +142,45 @@
* (see hyp-init.S).
*
* Note that when using 4K pages, we concatenate two first level page tables
- * together.
+ * together. With 16K pages, we concatenate 16 first level page tables.
*
* The magic numbers used for VTTBR_X in this patch can be found in Tables
* D4-23 and D4-25 in ARM DDI 0487A.b.
*/
+
+#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
+#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+ VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
+
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
- * 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
- VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
- VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
-#else
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 38
+#elif defined(CONFIG_ARM64_16K_PAGES)
+/*
+ * Stage2 translation configuration:
+ * 16kB pages (TG0 = 2)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 42
+#else /* 4K */
/*
* Stage2 translation configuration:
- * 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
- VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
- VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 37
#endif
+#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
+#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (UL(48))
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 22732a5..844fe5d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -45,18 +45,6 @@
*/
#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
-/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
- * levels in addition to the PGD and potentially the PUD which are
- * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
- * tables use one level of tables less than the kernel.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define KVM_MMU_CACHE_MIN_PAGES 1
-#else
-#define KVM_MMU_CACHE_MIN_PAGES 2
-#endif
-
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
@@ -91,6 +79,8 @@ alternative_endif
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
+#include <asm/stage2_pgtable.h>
+
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_boot_hyp_pgd(void);
@@ -121,19 +111,32 @@ static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
static inline void kvm_clean_pte_entry(pte_t *pte) {}
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
- pte_val(*pte) |= PTE_S2_RDWR;
+ pte_val(pte) |= PTE_S2_RDWR;
+ return pte;
}
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
{
- pmd_val(*pmd) |= PMD_S2_RDWR;
+ pmd_val(pmd) |= PMD_S2_RDWR;
+ return pmd;
}
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
- pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
+ pteval_t pteval;
+ unsigned long tmp;
+
+ asm volatile("// kvm_set_s2pte_readonly\n"
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " and %0, %0, %3 // clear PTE_S2_RDWR\n"
+ " orr %0, %0, %4 // set PTE_S2_RDONLY\n"
+ " stxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
+ : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
}
static inline bool kvm_s2pte_readonly(pte_t *pte)
@@ -143,69 +146,12 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
- pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
+ kvm_set_s2pte_readonly((pte_t *)pmd);
}
static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
{
- return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
-}
-
-
-#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
-#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
-#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
-
-/*
- * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
- * the entire IPA input range with a single pgd entry, and we would only need
- * one pgd entry. Note that in this case, the pgd is actually not used by
- * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
- * structure for the kernel pgtable macros to work.
- */
-#if PGDIR_SHIFT > KVM_PHYS_SHIFT
-#define PTRS_PER_S2_PGD_SHIFT 0
-#else
-#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
-#endif
-#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
-
-#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
-
-/*
- * If we are concatenating first level stage-2 page tables, we would have less
- * than or equal to 16 pointers in the fake PGD, because that's what the
- * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS)
- * represents the first level for the host, and we add 1 to go to the next
- * level (which uses contatenation) for the stage-2 tables.
- */
-#if PTRS_PER_S2_PGD <= 16
-#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1)
-#else
-#define KVM_PREALLOC_LEVEL (0)
-#endif
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
- pgd_t *pgd = kvm->arch.pgd;
- pud_t *pud;
-
- if (KVM_PREALLOC_LEVEL == 0)
- return pgd;
-
- pud = pud_offset(pgd, 0);
- if (KVM_PREALLOC_LEVEL == 1)
- return pud;
-
- BUG_ON(KVM_PREALLOC_LEVEL != 2);
- return pmd_offset(pud, 0);
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
- if (KVM_PREALLOC_LEVEL > 0)
- return PTRS_PER_S2_PGD * PAGE_SIZE;
- return PTRS_PER_S2_PGD * sizeof(pgd_t);
+ return kvm_s2pte_readonly((pte_t *)pmd);
}
static inline bool kvm_page_empty(void *ptr)
@@ -214,23 +160,20 @@ static inline bool kvm_page_empty(void *ptr)
return page_count(ptr_page) == 1;
}
-#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
-#define kvm_pmd_table_empty(kvm, pmdp) (0)
+#define hyp_pmd_table_empty(pmdp) (0)
#else
-#define kvm_pmd_table_empty(kvm, pmdp) \
- (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#endif
#ifdef __PAGETABLE_PUD_FOLDED
-#define kvm_pud_table_empty(kvm, pudp) (0)
+#define hyp_pud_table_empty(pudp) (0)
#else
-#define kvm_pud_table_empty(kvm, pudp) \
- (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
+#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
#endif
-
struct kvm;
#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5c25b83..936f173 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -208,23 +208,69 @@
#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
#define TCR_TxSZ_WIDTH 6
-#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24))
-#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24))
-#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24))
-#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26))
-#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26))
-#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26))
-#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
-#define TCR_TG0_4K (UL(0) << 14)
-#define TCR_TG0_64K (UL(1) << 14)
-#define TCR_TG0_16K (UL(2) << 14)
-#define TCR_TG1_16K (UL(1) << 30)
-#define TCR_TG1_4K (UL(2) << 30)
-#define TCR_TG1_64K (UL(3) << 30)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_IRGN1_SHIFT 24
+#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT)
+
+#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC)
+#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA)
+#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT)
+#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA)
+#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK)
+
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_ORGN1_SHIFT 26
+#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT)
+
+#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC)
+#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)
+#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT)
+#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA)
+#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_SH1_SHIFT 28
+#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT)
+#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT)
+#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_TG1_SHIFT 30
+#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT)
+#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT)
+#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
+#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
+
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 989fef1..f1d5afd 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -290,6 +290,8 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot)
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
+#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
+
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
@@ -530,14 +532,12 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
* Atomic pte/pmd modifications.
*/
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
+static inline int __ptep_test_and_clear_young(pte_t *ptep)
{
pteval_t pteval;
unsigned int tmp, res;
- asm volatile("// ptep_test_and_clear_young\n"
+ asm volatile("// __ptep_test_and_clear_young\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
@@ -550,6 +550,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
return res;
}
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ return __ptep_test_and_clear_young(ptep);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
new file mode 100644
index 0000000..2656a0f
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
+#define __ARM64_S2_PGTABLE_NOPMD_H_
+
+#include <asm/stage2_pgtable-nopud.h>
+
+#define __S2_PGTABLE_PMD_FOLDED
+
+#define S2_PMD_SHIFT S2_PUD_SHIFT
+#define S2_PTRS_PER_PMD 1
+#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
+#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
+
+#define stage2_pud_none(pud) (0)
+#define stage2_pud_present(pud) (1)
+#define stage2_pud_clear(pud) do { } while (0)
+#define stage2_pud_populate(pud, pmd) do { } while (0)
+#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
+
+#define stage2_pmd_free(pmd) do { } while (0)
+
+#define stage2_pmd_addr_end(addr, end) (end)
+
+#define stage2_pud_huge(pud) (0)
+#define stage2_pmd_table_empty(pmdp) (0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
new file mode 100644
index 0000000..5ee87b5
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
+#define __ARM64_S2_PGTABLE_NOPUD_H_
+
+#define __S2_PGTABLE_PUD_FOLDED
+
+#define S2_PUD_SHIFT S2_PGDIR_SHIFT
+#define S2_PTRS_PER_PUD 1
+#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
+
+#define stage2_pgd_none(pgd) (0)
+#define stage2_pgd_present(pgd) (1)
+#define stage2_pgd_clear(pgd) do { } while (0)
+#define stage2_pgd_populate(pgd, pud) do { } while (0)
+
+#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
+
+#define stage2_pud_free(x) do { } while (0)
+
+#define stage2_pud_addr_end(addr, end) (end)
+#define stage2_pud_table_empty(pmdp) (0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
new file mode 100644
index 0000000..8b68099
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_H_
+#define __ARM64_S2_PGTABLE_H_
+
+#include <asm/pgtable.h>
+
+/*
+ * The hardware supports concatenation of up to 16 tables at stage2 entry level
+ * and we use the feature whenever possible.
+ *
+ * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
+ * On arm64, the smallest PAGE_SIZE supported is 4k, which means
+ * (PAGE_SHIFT - 3) > 4 holds for all page sizes.
+ * This implies, the total number of page table levels at stage2 expected
+ * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * in normal translations(e.g, stage1), since we cannot have another level in
+ * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ */
+#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+
+/*
+ * With all the supported VA_BITs and 40bit guest IPA, the following condition
+ * is always true:
+ *
+ * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
+ *
+ * We base our stage-2 page table walker helpers on this assumption and
+ * fall back to using the host version of the helper wherever possible.
+ * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
+ * to using the host version, since it is guaranteed it is not folded at host.
+ *
+ * If the condition breaks in the future, we can rearrange the host level
+ * definitions and reuse them for stage2. Till then...
+ */
+#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
+#error "Unsupported combination of guest IPA and host VA_BITS."
+#endif
+
+/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
+#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
+#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
+#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
+
+/*
+ * The number of PTRS across all concatenated stage2 tables given by the
+ * number of bits resolved at the initial level.
+ */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
+ * levels in addition to the PGD.
+ */
+#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
+
+
+#if STAGE2_PGTABLE_LEVELS > 3
+
+#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
+
+#define stage2_pgd_none(pgd) pgd_none(pgd)
+#define stage2_pgd_clear(pgd) pgd_clear(pgd)
+#define stage2_pgd_present(pgd) pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
+#define stage2_pud_free(pud) pud_free(NULL, pud)
+
+#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
+
+static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* STAGE2_PGTABLE_LEVELS > 3 */
+
+
+#if STAGE2_PGTABLE_LEVELS > 2
+
+#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
+#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
+
+#define stage2_pud_none(pud) pud_none(pud)
+#define stage2_pud_clear(pud) pud_clear(pud)
+#define stage2_pud_present(pud) pud_present(pud)
+#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
+#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud) pud_huge(pud)
+#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* STAGE2_PGTABLE_LEVELS > 2 */
+
+#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
+
+#if STAGE2_PGTABLE_LEVELS == 2
+#include <asm/stage2_pgtable-nopmd.h>
+#elif STAGE2_PGTABLE_LEVELS == 3
+#include <asm/stage2_pgtable-nopud.h>
+#endif
+
+
+#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index de7450d..aa2e34e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM_ARM_VGIC_V3
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
- depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index bcbe761..b81f409 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -66,6 +66,14 @@ u32 __hyp_text __init_stage2_translation(void)
val |= 64 - (parange > 40 ? 40 : parange);
/*
+ * Check the availability of Hardware Access Flag / Dirty Bit
+ * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
+ if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+ val |= VTCR_EL2_HA;
+
+ /*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
* bit in VTCR_EL2.
*/
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b38..4814446 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -468,11 +468,11 @@ static struct cyclecounter cyclecounter = {
.mask = CLOCKSOURCE_MASK(56),
};
-static struct timecounter timecounter;
+static struct arch_timer_kvm_info arch_timer_kvm_info;
-struct timecounter *arch_timer_get_timecounter(void)
+struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
{
- return &timecounter;
+ return &arch_timer_kvm_info;
}
static void __init arch_counter_register(unsigned type)
@@ -500,7 +500,8 @@ static void __init arch_counter_register(unsigned type)
clocksource_register_hz(&clocksource_counter, arch_timer_rate);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
- timecounter_init(&timecounter, &cyclecounter, start_count);
+ timecounter_init(&arch_timer_kvm_info.timecounter,
+ &cyclecounter, start_count);
/* 56 bits minimum, so we assume worst case rollover */
sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
@@ -744,6 +745,8 @@ static void __init arch_timer_init(void)
arch_timer_register();
arch_timer_common_init();
+
+ arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI];
}
static void __init arch_timer_of_init(struct device_node *np)
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f174ce0..2e9443b 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,19 @@
#include "irq-gic-common.h"
+static const struct gic_kvm_info *gic_kvm_info;
+
+const struct gic_kvm_info *gic_get_kvm_info(void)
+{
+ return gic_kvm_info;
+}
+
+void gic_set_kvm_info(const struct gic_kvm_info *info)
+{
+ BUG_ON(gic_kvm_info != NULL);
+ gic_kvm_info = info;
+}
+
void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data)
{
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697d..205e5fd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,7 @@
#include <linux/of.h>
#include <linux/irqdomain.h>
+#include <linux/irqchip/arm-gic-common.h>
struct gic_quirk {
const char *desc;
@@ -35,4 +36,6 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data);
+void gic_set_kvm_info(const struct gic_kvm_info *info);
+
#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5b7d3c2..05a8560 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -15,6 +15,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define pr_fmt(fmt) "GICv3: " fmt
+
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
@@ -28,6 +30,7 @@
#include <linux/slab.h>
#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-common.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/cputype.h>
@@ -56,6 +59,8 @@ struct gic_chip_data {
static struct gic_chip_data gic_data __read_mostly;
static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+static struct gic_kvm_info gic_v3_kvm_info;
+
#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K)
@@ -901,6 +906,30 @@ static int __init gic_validate_dist_version(void __iomem *dist_base)
return 0;
}
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+ int ret;
+ struct resource r;
+ u32 gicv_idx;
+
+ gic_v3_kvm_info.type = GIC_V3;
+
+ gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+ if (!gic_v3_kvm_info.maint_irq)
+ return;
+
+ if (of_property_read_u32(node, "#redistributor-regions",
+ &gicv_idx))
+ gicv_idx = 1;
+
+ gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+ ret = of_address_to_resource(node, gicv_idx, &r);
+ if (!ret)
+ gic_v3_kvm_info.vcpu = r;
+
+ gic_set_kvm_info(&gic_v3_kvm_info);
+}
+
static int __init gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *dist_base;
@@ -952,8 +981,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions,
redist_stride, &node->fwnode);
- if (!err)
+ if (!err) {
+ gic_of_setup_kvm_info(node);
return 0;
+ }
out_unmap_rdist:
for (i = 0; i < nr_redist_regions; i++)
@@ -968,19 +999,25 @@ out_unmap_dist:
IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
#ifdef CONFIG_ACPI
-static void __iomem *dist_base;
-static struct redist_region *redist_regs __initdata;
-static u32 nr_redist_regions __initdata;
-static bool single_redist;
+static struct
+{
+ void __iomem *dist_base;
+ struct redist_region *redist_regs;
+ u32 nr_redist_regions;
+ bool single_redist;
+ u32 maint_irq;
+ int maint_irq_mode;
+ phys_addr_t vcpu_base;
+} acpi_data __initdata;
static void __init
gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
{
static int count = 0;
- redist_regs[count].phys_base = phys_base;
- redist_regs[count].redist_base = redist_base;
- redist_regs[count].single_redist = single_redist;
+ acpi_data.redist_regs[count].phys_base = phys_base;
+ acpi_data.redist_regs[count].redist_base = redist_base;
+ acpi_data.redist_regs[count].single_redist = acpi_data.single_redist;
count++;
}
@@ -1008,7 +1045,7 @@ gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
{
struct acpi_madt_generic_interrupt *gicc =
(struct acpi_madt_generic_interrupt *)header;
- u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+ u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
@@ -1025,7 +1062,7 @@ static int __init gic_acpi_collect_gicr_base(void)
acpi_tbl_entry_handler redist_parser;
enum acpi_madt_type type;
- if (single_redist) {
+ if (acpi_data.single_redist) {
type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
redist_parser = gic_acpi_parse_madt_gicc;
} else {
@@ -1076,14 +1113,14 @@ static int __init gic_acpi_count_gicr_regions(void)
count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
gic_acpi_match_gicr, 0);
if (count > 0) {
- single_redist = false;
+ acpi_data.single_redist = false;
return count;
}
count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
gic_acpi_match_gicc, 0);
if (count > 0)
- single_redist = true;
+ acpi_data.single_redist = true;
return count;
}
@@ -1103,36 +1140,117 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
if (count <= 0)
return false;
- nr_redist_regions = count;
+ acpi_data.nr_redist_regions = count;
return true;
}
+static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_madt_generic_interrupt *gicc =
+ (struct acpi_madt_generic_interrupt *)header;
+ int maint_irq_mode;
+ static int first_madt = true;
+
+ /* Skip unusable CPUs */
+ if (!(gicc->flags & ACPI_MADT_ENABLED))
+ return 0;
+
+ maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+ ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+
+ if (first_madt) {
+ first_madt = false;
+
+ acpi_data.maint_irq = gicc->vgic_interrupt;
+ acpi_data.maint_irq_mode = maint_irq_mode;
+ acpi_data.vcpu_base = gicc->gicv_base_address;
+
+ return 0;
+ }
+
+ /*
+ * The maintenance interrupt and GICV should be the same for every CPU
+ */
+ if ((acpi_data.maint_irq != gicc->vgic_interrupt) ||
+ (acpi_data.maint_irq_mode != maint_irq_mode) ||
+ (acpi_data.vcpu_base != gicc->gicv_base_address))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool __init gic_acpi_collect_virt_info(void)
+{
+ int count;
+
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+ gic_acpi_parse_virt_madt_gicc, 0);
+
+ return (count > 0);
+}
+
#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+ int irq;
+
+ if (!gic_acpi_collect_virt_info()) {
+ pr_warn("Unable to get hardware information used for virtualization\n");
+ return;
+ }
+
+ gic_v3_kvm_info.type = GIC_V3;
+
+ irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+ acpi_data.maint_irq_mode,
+ ACPI_ACTIVE_HIGH);
+ if (irq <= 0)
+ return;
+
+ gic_v3_kvm_info.maint_irq = irq;
+
+ if (acpi_data.vcpu_base) {
+ struct resource *vcpu = &gic_v3_kvm_info.vcpu;
+
+ vcpu->flags = IORESOURCE_MEM;
+ vcpu->start = acpi_data.vcpu_base;
+ vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+ }
+
+ gic_set_kvm_info(&gic_v3_kvm_info);
+}
static int __init
gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_generic_distributor *dist;
struct fwnode_handle *domain_handle;
+ size_t size;
int i, err;
/* Get distributor base address */
dist = (struct acpi_madt_generic_distributor *)header;
- dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE);
- if (!dist_base) {
+ acpi_data.dist_base = ioremap(dist->base_address,
+ ACPI_GICV3_DIST_MEM_SIZE);
+ if (!acpi_data.dist_base) {
pr_err("Unable to map GICD registers\n");
return -ENOMEM;
}
- err = gic_validate_dist_version(dist_base);
+ err = gic_validate_dist_version(acpi_data.dist_base);
if (err) {
- pr_err("No distributor detected at @%p, giving up", dist_base);
+ pr_err("No distributor detected at @%p, giving up",
+ acpi_data.dist_base);
goto out_dist_unmap;
}
- redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions,
- GFP_KERNEL);
- if (!redist_regs) {
+ size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions;
+ acpi_data.redist_regs = kzalloc(size, GFP_KERNEL);
+ if (!acpi_data.redist_regs) {
err = -ENOMEM;
goto out_dist_unmap;
}
@@ -1141,29 +1259,31 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
if (err)
goto out_redist_unmap;
- domain_handle = irq_domain_alloc_fwnode(dist_base);
+ domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base);
if (!domain_handle) {
err = -ENOMEM;
goto out_redist_unmap;
}
- err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0,
- domain_handle);
+ err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
+ acpi_data.nr_redist_regions, 0, domain_handle);
if (err)
goto out_fwhandle_free;
acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+ gic_acpi_setup_kvm_info();
+
return 0;
out_fwhandle_free:
irq_domain_free_fwnode(domain_handle);
out_redist_unmap:
- for (i = 0; i < nr_redist_regions; i++)
- if (redist_regs[i].redist_base)
- iounmap(redist_regs[i].redist_base);
- kfree(redist_regs);
+ for (i = 0; i < acpi_data.nr_redist_regions; i++)
+ if (acpi_data.redist_regs[i].redist_base)
+ iounmap(acpi_data.redist_regs[i].redist_base);
+ kfree(acpi_data.redist_regs);
out_dist_unmap:
- iounmap(dist_base);
+ iounmap(acpi_data.dist_base);
return err;
}
IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 282344b..3f1d9fd 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -102,6 +102,8 @@ static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
+static struct gic_kvm_info gic_v2_kvm_info;
+
#ifdef CONFIG_GIC_NON_BANKED
static void __iomem *gic_get_percpu_base(union gic_base *base)
{
@@ -1189,6 +1191,29 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
return true;
}
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+ int ret;
+ struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+ struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+ gic_v2_kvm_info.type = GIC_V2;
+
+ gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+ if (!gic_v2_kvm_info.maint_irq)
+ return;
+
+ ret = of_address_to_resource(node, 2, vctrl_res);
+ if (ret)
+ return;
+
+ ret = of_address_to_resource(node, 3, vcpu_res);
+ if (ret)
+ return;
+
+ gic_set_kvm_info(&gic_v2_kvm_info);
+}
+
int __init
gic_of_init(struct device_node *node, struct device_node *parent)
{
@@ -1218,8 +1243,10 @@ gic_of_init(struct device_node *node, struct device_node *parent)
__gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset,
&node->fwnode);
- if (!gic_cnt)
+ if (!gic_cnt) {
gic_init_physaddr(node);
+ gic_of_setup_kvm_info(node);
+ }
if (parent) {
irq = irq_of_parse_and_map(node, 0);
@@ -1245,7 +1272,14 @@ IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
#endif
#ifdef CONFIG_ACPI
-static phys_addr_t cpu_phy_base __initdata;
+static struct
+{
+ phys_addr_t cpu_phys_base;
+ u32 maint_irq;
+ int maint_irq_mode;
+ phys_addr_t vctrl_base;
+ phys_addr_t vcpu_base;
+} acpi_data __initdata;
static int __init
gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
@@ -1265,10 +1299,16 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
* All CPU interface addresses have to be the same.
*/
gic_cpu_base = processor->base_address;
- if (cpu_base_assigned && gic_cpu_base != cpu_phy_base)
+ if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base)
return -EINVAL;
- cpu_phy_base = gic_cpu_base;
+ acpi_data.cpu_phys_base = gic_cpu_base;
+ acpi_data.maint_irq = processor->vgic_interrupt;
+ acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+ ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+ acpi_data.vctrl_base = processor->gich_base_address;
+ acpi_data.vcpu_base = processor->gicv_base_address;
+
cpu_base_assigned = 1;
return 0;
}
@@ -1299,6 +1339,41 @@ static bool __init gic_validate_dist(struct acpi_subtable_header *header,
#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K)
#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+ int irq;
+ struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+ struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+ gic_v2_kvm_info.type = GIC_V2;
+
+ if (!acpi_data.vctrl_base)
+ return;
+
+ vctrl_res->flags = IORESOURCE_MEM;
+ vctrl_res->start = acpi_data.vctrl_base;
+ vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1;
+
+ if (!acpi_data.vcpu_base)
+ return;
+
+ vcpu_res->flags = IORESOURCE_MEM;
+ vcpu_res->start = acpi_data.vcpu_base;
+ vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+
+ irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+ acpi_data.maint_irq_mode,
+ ACPI_ACTIVE_HIGH);
+ if (irq <= 0)
+ return;
+
+ gic_v2_kvm_info.maint_irq = irq;
+
+ gic_set_kvm_info(&gic_v2_kvm_info);
+}
static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
const unsigned long end)
@@ -1316,7 +1391,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
return -EINVAL;
}
- cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
+ cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
if (!cpu_base) {
pr_err("Unable to map GICC registers\n");
return -ENOMEM;
@@ -1356,6 +1431,8 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
gicv2m_init(NULL, gic_data[0].domain);
+ gic_acpi_setup_kvm_info();
+
return 0;
}
IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 25d0914..caedb74 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -49,11 +49,16 @@ enum arch_timer_reg {
#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */
+struct arch_timer_kvm_info {
+ struct timecounter timecounter;
+ int virtual_irq;
+};
+
#ifdef CONFIG_ARM_ARCH_TIMER
extern u32 arch_timer_get_rate(void);
extern u64 (*arch_timer_read_counter)(void);
-extern struct timecounter *arch_timer_get_timecounter(void);
+extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
#else
@@ -67,11 +72,6 @@ static inline u64 arch_timer_read_counter(void)
return 0;
}
-static inline struct timecounter *arch_timer_get_timecounter(void)
-{
- return NULL;
-}
-
#endif
#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 281caf8..be6037a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
#define VGIC_NR_IRQS_LEGACY 256
#define VGIC_NR_SGIS 16
@@ -353,15 +354,15 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
#define vgic_ready(k) ((k)->arch.vgic.ready)
-int vgic_v2_probe(struct device_node *vgic_node,
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params);
#ifdef CONFIG_KVM_ARM_VGIC_V3
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params);
#else
-static inline int vgic_v3_probe(struct device_node *vgic_node,
+static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params)
{
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
new file mode 100644
index 0000000..c647b05
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -0,0 +1,34 @@
+/*
+ * include/linux/irqchip/arm-gic-common.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+ GIC_V2,
+ GIC_V3,
+};
+
+struct gic_kvm_info {
+ /* GIC type */
+ enum gic_type type;
+ /* Virtual CPU interface */
+ struct resource vcpu;
+ /* Interrupt number */
+ unsigned int maint_irq;
+ /* Virtual control interface */
+ struct resource vctrl;
+};
+
+const struct gic_kvm_info *gic_get_kvm_info(void);
+
+#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 9aaa35d..409db33 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -17,7 +17,6 @@
*/
#include <linux/cpu.h>
-#include <linux/of_irq.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
@@ -438,45 +437,29 @@ static struct notifier_block kvm_timer_cpu_nb = {
.notifier_call = kvm_timer_cpu_notify,
};
-static const struct of_device_id arch_timer_of_match[] = {
- { .compatible = "arm,armv7-timer", },
- { .compatible = "arm,armv8-timer", },
- {},
-};
-
int kvm_timer_hyp_init(void)
{
- struct device_node *np;
- unsigned int ppi;
+ struct arch_timer_kvm_info *info;
int err;
- timecounter = arch_timer_get_timecounter();
- if (!timecounter)
- return -ENODEV;
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
- np = of_find_matching_node(NULL, arch_timer_of_match);
- if (!np) {
- kvm_err("kvm_arch_timer: can't find DT node\n");
+ if (info->virtual_irq <= 0) {
+ kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
+ info->virtual_irq);
return -ENODEV;
}
+ host_vtimer_irq = info->virtual_irq;
- ppi = irq_of_parse_and_map(np, 2);
- if (!ppi) {
- kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
- err = -EINVAL;
- goto out;
- }
-
- err = request_percpu_irq(ppi, kvm_arch_timer_handler,
+ err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest timer", kvm_get_running_vcpus());
if (err) {
kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
- ppi, err);
+ host_vtimer_irq, err);
goto out;
}
- host_vtimer_irq = ppi;
-
err = __register_cpu_notifier(&kvm_timer_cpu_nb);
if (err) {
kvm_err("Cannot register timer CPU notifier\n");
@@ -489,14 +472,13 @@ int kvm_timer_hyp_init(void)
goto out_free;
}
- kvm_info("%s IRQ%d\n", np->name, ppi);
+ kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
goto out;
out_free:
- free_percpu_irq(ppi, kvm_get_running_vcpus());
+ free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
out:
- of_node_put(np);
return err;
}
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 67ec334..7e826c9 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -20,9 +20,6 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic.h>
@@ -186,38 +183,39 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv2 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv2 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv2 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params)
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
{
int ret;
- struct resource vctrl_res;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v2_params;
+ const struct resource *vctrl_res = &gic_kvm_info->vctrl;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
+ if (!gic_kvm_info->maint_irq) {
+ kvm_err("error getting vgic maintenance irq\n");
ret = -ENXIO;
goto out;
}
+ vgic->maint_irq = gic_kvm_info->maint_irq;
- ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
- if (ret) {
- kvm_err("Cannot obtain GICH resource\n");
+ if (!gic_kvm_info->vctrl.start) {
+ kvm_err("GICH not present in the firmware table\n");
+ ret = -ENXIO;
goto out;
}
- vgic->vctrl_base = of_iomap(vgic_node, 2);
+ vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start,
+ resource_size(&gic_kvm_info->vctrl));
if (!vgic->vctrl_base) {
kvm_err("Cannot ioremap GICH\n");
ret = -ENOMEM;
@@ -228,29 +226,23 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
ret = create_hyp_io_mappings(vgic->vctrl_base,
- vgic->vctrl_base + resource_size(&vctrl_res),
- vctrl_res.start);
+ vgic->vctrl_base + resource_size(vctrl_res),
+ vctrl_res->start);
if (ret) {
kvm_err("Cannot map VCTRL into hyp\n");
goto out_unmap;
}
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain GICV resource\n");
- ret = -ENXIO;
- goto out_unmap;
- }
-
- if (!PAGE_ALIGNED(vcpu_res.start)) {
+ if (!PAGE_ALIGNED(vcpu_res->start)) {
kvm_err("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
ret = -ENXIO;
goto out_unmap;
}
- if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
ret = -ENXIO;
goto out_unmap;
@@ -259,10 +251,10 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vctrl_res.start, vgic->maint_irq);
+ kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n",
+ gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq);
vgic->type = VGIC_V2;
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -276,6 +268,5 @@ int vgic_v2_probe(struct device_node *vgic_node,
out_unmap:
iounmap(vgic->vctrl_base);
out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 999bdc6..c02a1b1 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -20,11 +20,9 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-common.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_arm.h>
@@ -222,30 +220,24 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv3 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv3 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv3 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params)
{
int ret = 0;
- u32 gicv_idx;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v3_params;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
- ret = -ENXIO;
- goto out;
- }
+ vgic->maint_irq = gic_kvm_info->maint_irq;
ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
@@ -256,24 +248,19 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
vgic->can_emulate_gicv2 = false;
- if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
- gicv_idx = 1;
-
- gicv_idx += 3; /* Also skip GICD, GICC, GICH */
- if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+ if (!vcpu_res->start) {
kvm_info("GICv3: no GICV resource entry\n");
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(vcpu_res.start)) {
+ } else if (!PAGE_ALIGNED(vcpu_res->start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
- vgic->vcpu_base = 0;
} else {
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -286,15 +273,13 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->type = VGIC_V3;
vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vcpu_res.start, vgic->maint_irq);
+ kvm_info("GICV base=0x%llx, IRQ=%d\n",
+ vgic->vcpu_base, vgic->maint_irq);
on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
*ops = &vgic_v3_ops;
*params = vgic;
-out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 00429b3..60668a7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -21,9 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
+#include <linux/irq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
@@ -33,6 +31,7 @@
#include <trace/events/kvm.h>
#include <asm/kvm.h>
#include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -2389,33 +2388,38 @@ static struct notifier_block vgic_cpu_nb = {
.notifier_call = vgic_cpu_notify,
};
-static const struct of_device_id vgic_ids[] = {
- { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
- {},
-};
-
-int kvm_vgic_hyp_init(void)
+static int kvm_vgic_probe(void)
{
- const struct of_device_id *matched_id;
- const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
- const struct vgic_params **);
- struct device_node *vgic_node;
+ const struct gic_kvm_info *gic_kvm_info;
int ret;
- vgic_node = of_find_matching_node_and_match(NULL,
- vgic_ids, &matched_id);
- if (!vgic_node) {
- kvm_err("error: no compatible GIC node found\n");
+ gic_kvm_info = gic_get_kvm_info();
+ if (!gic_kvm_info)
return -ENODEV;
+
+ switch (gic_kvm_info->type) {
+ case GIC_V2:
+ ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ case GIC_V3:
+ ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ default:
+ ret = -ENODEV;
}
- vgic_probe = matched_id->data;
- ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
- if (ret)
+ return ret;
+}
+
+int kvm_vgic_hyp_init(void)
+{
+ int ret;
+
+ ret = kvm_vgic_probe();
+ if (ret) {
+ kvm_err("error: KVM vGIC probing failed\n");
return ret;
+ }
ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());