From 2fc136eecd0c647a6b13fcd00d0c41a1a28f35a5 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 12 Sep 2012 12:44:30 +0100 Subject: xen/m2p: do not reuse kmap_op->dev_bus_addr If the caller passes a valid kmap_op to m2p_add_override, we use kmap_op->dev_bus_addr to store the original mfn, but dev_bus_addr is part of the interface with Xen and if we are batching the hypercalls it might not have been written by the hypervisor yet. That means that later on Xen will write to it and we'll think that the original mfn is actually what Xen has written to it. Rather than "stealing" struct members from kmap_op, keep using page->index to store the original mfn and add another parameter to m2p_remove_override to get the corresponding kmap_op instead. It is now responsibility of the caller to keep track of which kmap_op corresponds to a particular page in the m2p_override (gntdev, the only user of this interface that passes a valid kmap_op, is already doing that). CC: stable@kernel.org Reported-and-Tested-By: Sander Eikelenboom Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e8..472b9b7 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e9..72213da 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 73f196c..c6decb9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req) invcount++; } - ret = gnttab_unmap_refs(unmap, pages, invcount, false); + ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); BUG_ON(ret); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1ffd03b..7f12416 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, - pages, true); + err = gnttab_unmap_refs(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0bfc1ef..0067266 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; bool lazy = false; @@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], clear_pte); + ret = m2p_remove_override(pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) return ret; } diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 11e27c3..f19fff8 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte); + struct gnttab_map_grant_ref *kunmap_ops, + struct page **pages, unsigned int count); #endif /* __ASM_GNTTAB_H__ */ -- cgit v0.10.2 From bd49940a35ec7d488ae63bd625639893b3385b97 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 19 Sep 2012 08:30:55 -0400 Subject: xen/boot: Disable BIOS SMP MP table search. As the initial domain we are able to search/map certain regions of memory to harvest configuration data. For all low-level we use ACPI tables - for interrupts we use exclusively ACPI _PRT (so DSDT) and MADT for INT_SRC_OVR. The SMP MP table is not used at all. As a matter of fact we do not even support machines that only have SMP MP but no ACPI tables. Lets follow how Moorestown does it and just disable searching for BIOS SMP tables. This also fixes an issue on HP Proliant BL680c G5 and DL380 G6: 9f->100 for 1:1 PTE Freeing 9f-100 pfn range: 97 pages freed 1-1 mapping on 9f->100 .. snip.. e820: BIOS-provided physical RAM map: Xen: [mem 0x0000000000000000-0x000000000009efff] usable Xen: [mem 0x000000000009f400-0x00000000000fffff] reserved Xen: [mem 0x0000000000100000-0x00000000cfd1dfff] usable .. snip.. Scan for SMP in [mem 0x00000000-0x000003ff] Scan for SMP in [mem 0x0009fc00-0x0009ffff] Scan for SMP in [mem 0x000f0000-0x000fffff] found SMP MP-table at [mem 0x000f4fa0-0x000f4faf] mapped at [ffff8800000f4fa0] (XEN) mm.c:908:d0 Error getting mfn 100 (pfn 5555555555555555) from L1 entry 0000000000100461 for l1e_owner=0, pg_owner=0 (XEN) mm.c:4995:d0 ptwr_emulate: could not get_page_from_l1e() BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] xen_set_pte_init+0x66/0x71 . snip.. Pid: 0, comm: swapper Not tainted 3.6.0-rc6upstream-00188-gb6fb969-dirty #2 HP ProLiant BL680c G5 .. snip.. Call Trace: [] __early_ioremap+0x18a/0x248 [] ? printk+0x48/0x4a [] early_ioremap+0x13/0x15 [] get_mpc_size+0x2f/0x67 [] smp_scan_config+0x10c/0x136 [] default_find_smp_config+0x36/0x5a [] setup_arch+0x5b3/0xb5b [] ? printk+0x48/0x4a [] start_kernel+0x90/0x390 [] x86_64_start_reservations+0x131/0x136 [] xen_start_kernel+0x65f/0x661 (XEN) Domain 0 crashed: 'noreboot' set - not rebooting. which is that ioremap would end up mapping 0xff using _PAGE_IOMAP (which is what early_ioremap sticks as a flag) - which meant we would get MFN 0xFF (pte ff461, which is OK), and then it would also map 0x100 (b/c ioremap tries to get page aligned request, and it was trying to map 0xf4fa0 + PAGE_SIZE - so it mapped the next page) as _PAGE_IOMAP. Since 0x100 is actually a RAM page, and the _PAGE_IOMAP bypasses the P2M lookup we would happily set the PTE to 1000461. Xen would deny the request since we do not have access to the Machine Frame Number (MFN) of 0x100. The P2M[0x100] is for example 0x80140. CC: stable@vger.kernel.org Fixes-Oracle-Bugzilla: https://bugzilla.oracle.com/bugzilla/show_bug.cgi?id=13665 Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a..1fbe75a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void) pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ -- cgit v0.10.2