summaryrefslogtreecommitdiff
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/enlighten.c21
-rw-r--r--arch/x86/xen/mmu.c86
-rw-r--r--arch/x86/xen/setup.c30
3 files changed, 102 insertions, 35 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4..02c710b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -1090,6 +1095,8 @@ static void __init xen_setup_stackprotector(void)
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
+ struct physdev_set_iopl set_iopl;
+ int rc;
pgd_t *pgd;
if (!xen_start_info)
@@ -1097,6 +1104,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_machphys_mapping();
+
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
@@ -1191,8 +1200,6 @@ asmlinkage void __init xen_start_kernel(void)
/* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list();
- init_mm.pgd = pgd;
-
/* keep using Xen gdt for now; no urgent need to change it */
#ifdef CONFIG_X86_32
@@ -1202,10 +1209,18 @@ asmlinkage void __init xen_start_kernel(void)
#else
pv_info.kernel_rpl = 0;
#endif
-
/* set the limit of our address space */
xen_reserve_top();
+ /* We used to do this in xen_arch_setup, but that is too late on AMD
+ * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
+ * which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c237b81..a1feff9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
set_page_prot(pmd, PAGE_KERNEL_RO);
}
+void __init xen_setup_machphys_mapping(void)
+{
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
+
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ } else {
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ }
+ machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+}
+
#ifdef CONFIG_X86_64
static void convert_pfn_mfn(void *v)
{
@@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
return pgd;
}
#else /* !CONFIG_X86_64 */
-static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
+
+static __init void xen_write_cr3_init(unsigned long cr3)
+{
+ unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+
+ BUG_ON(read_cr3() != __pa(initial_page_table));
+ BUG_ON(cr3 != __pa(swapper_pg_dir));
+
+ /*
+ * We are switching to swapper_pg_dir for the first time (from
+ * initial_page_table) and therefore need to mark that page
+ * read-only and then pin it.
+ *
+ * Xen disallows sharing of kernel PMDs for PAE
+ * guests. Therefore we must copy the kernel PMD from
+ * initial_page_table into a new kernel PMD to be used in
+ * swapper_pg_dir.
+ */
+ swapper_kernel_pmd =
+ extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+ memcpy(swapper_kernel_pmd, initial_kernel_pmd,
+ sizeof(pmd_t) * PTRS_PER_PMD);
+ swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
+ __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
+ set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
+
+ set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+ xen_write_cr3(cr3);
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
+
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+ PFN_DOWN(__pa(initial_page_table)));
+ set_page_prot(initial_page_table, PAGE_KERNEL);
+ set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
+
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
+}
__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pmd_t *kernel_pmd;
- level2_kernel_pgt = extend_brk(sizeof(pmd_t *) * PTRS_PER_PMD, PAGE_SIZE);
+ initial_kernel_pmd =
+ extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
xen_start_info->nr_pt_frames * PAGE_SIZE +
512*1024);
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
- memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+ memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
- xen_map_identity_early(level2_kernel_pgt, max_pfn);
+ xen_map_identity_early(initial_kernel_pmd, max_pfn);
- memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
- set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
- __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+ memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+ initial_page_table[KERNEL_PGD_BOUNDARY] =
+ __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
- set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+ set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
+ set_page_prot(initial_page_table, PAGE_KERNEL_RO);
set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
- xen_write_cr3(__pa(swapper_pg_dir));
-
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
+ PFN_DOWN(__pa(initial_page_table)));
+ xen_write_cr3(__pa(initial_page_table));
memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
__pa(xen_start_info->pt_base +
xen_start_info->nr_pt_frames * PAGE_SIZE),
"XEN PAGETABLES");
- return swapper_pg_dir;
+ return initial_page_table;
}
#endif /* CONFIG_X86_64 */
@@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
+#ifdef CONFIG_X86_32
+ .write_cr3 = xen_write_cr3_init,
+#else
.write_cr3 = xen_write_cr3,
+#endif
.flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb,
@@ -2627,7 +2684,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
- vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
+ (VM_PFNMAP | VM_RESERVED | VM_IO)));
rmd.mfn = mfn;
rmd.prot = prot;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b1dbdaa..01afd8a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
#include <xen/interface/callback.h>
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
-#include <xen/interface/memory.h>
#include <xen/features.h>
#include "xen-ops.h"
@@ -118,16 +117,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
const struct e820map *e820)
{
phys_addr_t max_addr = PFN_PHYS(max_pfn);
- phys_addr_t last_end = 0;
+ phys_addr_t last_end = ISA_END_ADDRESS;
unsigned long released = 0;
int i;
+ /* Free any unused memory above the low 1Mbyte. */
for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
phys_addr_t end = e820->map[i].addr;
end = min(max_addr, end);
- released += xen_release_chunk(last_end, end);
- last_end = e820->map[i].addr + e820->map[i].size;
+ if (last_end < end)
+ released += xen_release_chunk(last_end, end);
+ last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
}
if (last_end < max_addr)
@@ -164,6 +165,7 @@ char * __init xen_memory_setup(void)
XENMEM_memory_map;
rc = HYPERVISOR_memory_op(op, &memmap);
if (rc == -ENOSYS) {
+ BUG_ON(xen_initial_domain());
memmap.nr_entries = 1;
map[0].addr = 0ULL;
map[0].size = mem_end;
@@ -201,12 +203,13 @@ char * __init xen_memory_setup(void)
}
/*
- * Even though this is normal, usable memory under Xen, reserve
- * ISA memory anyway because too many things think they can poke
+ * In domU, the ISA region is normal, usable memory, but we
+ * reserve ISA memory anyway because too many things poke
* about in there.
*
- * In a dom0 kernel, this region is identity mapped with the
- * hardware ISA area, so it really is out of bounds.
+ * In Dom0, the host E820 information can leave gaps in the
+ * ISA range, which would cause us to release those pages. To
+ * avoid this, we unconditionally reserve them here.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
@@ -244,8 +247,7 @@ char * __init xen_memory_setup(void)
else
extra_pages = 0;
- if (!xen_initial_domain())
- xen_add_extra_mem(extra_pages);
+ xen_add_extra_mem(extra_pages);
return "Xen";
}
@@ -333,9 +335,6 @@ void __cpuinit xen_enable_syscall(void)
void __init xen_arch_setup(void)
{
- struct physdev_set_iopl set_iopl;
- int rc;
-
xen_panic_handler_init();
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -352,11 +351,6 @@ void __init xen_arch_setup(void)
xen_enable_sysenter();
xen_enable_syscall();
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- printk(KERN_INFO "physdev_op failed %d\n", rc);
-
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");