From a2d771c036eb8c040683089ca04c36dfb93a0e60 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 29 Oct 2010 16:56:19 +0100 Subject: xen: correct size of level2_kernel_pgt sizeof(pmd_t *) is 4 bytes on 32-bit PAE leading to an allocation of only 2048 bytes. The correct size is sizeof(pmd_t) giving us a full page allocation. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c237b81..21ed8d7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2126,7 +2126,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, { pmd_t *kernel_pmd; - level2_kernel_pgt = extend_brk(sizeof(pmd_t *) * PTRS_PER_PMD, PAGE_SIZE); + level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + xen_start_info->nr_pt_frames * PAGE_SIZE + -- cgit v0.10.2 From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 1 Nov 2010 16:30:09 +0000 Subject: xen: events: do not unmask event channels on resume The IRQ core code will take care of disabling and reenabling interrupts over suspend resume automatically, therefore we do not need to do this in the Xen event channel code. The only exception is those event channels marked IRQF_NO_SUSPEND which the IRQ core ignores. We must unmask these ourselves, taking care to obey the current IRQ_DISABLED status. Failure check for IRQ_DISABLED leads to enabling polled only event channels, such as that associated with the pv spinlocks, which must never be enabled: [ 21.970432] ------------[ cut here ]------------ [ 21.970432] kernel BUG at arch/x86/xen/spinlock.c:343! [ 21.970432] invalid opcode: 0000 [#1] SMP [ 21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate [ 21.970432] Modules linked in: [ 21.970432] [ 21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34) [ 21.970432] EIP: 0061:[] EFLAGS: 00010046 CPU: 3 [ 21.970432] EIP is at dummy_handler+0x3/0x7 [ 21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000 [ 21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10 [ 21.970432] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069 [ 21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000) [ 21.970432] Stack: [ 21.970432] dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001 [ 21.970432] <0> dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284 [ 21.970432] <0> 00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90 [ 21.970432] Call Trace: [ 21.970432] [] ? handle_IRQ_event+0x5f/0x122 [ 21.970432] [] ? handle_percpu_irq+0x2f/0x55 [ 21.970432] [] ? __xen_evtchn_do_upcall+0xdb/0x15f [ 21.970432] [] ? xen_evtchn_do_upcall+0x20/0x30 [ 21.970432] [] ? xen_do_upcall+0x7/0xc [ 21.970432] [] ? apic_reg_read+0xd3/0x22d [ 21.970432] [] ? hypercall_page+0x227/0x1005 [ 21.970432] [] ? xen_force_evtchn_callback+0xf/0x14 [ 21.970432] [] ? check_events+0x8/0xc [ 21.970432] [] ? xen_irq_enable_direct_end+0x0/0x1 [ 21.970432] [] ? finish_task_switch+0x62/0xba [ 21.970432] [] ? schedule+0x808/0x89d [ 21.970432] [] ? hrtimer_start_expires+0x1a/0x22 [ 21.970432] [] ? tick_nohz_restart_sched_tick+0x15a/0x162 [ 21.970432] [] ? cpu_idle+0x6d/0x6f [ 21.970432] [] ? cpu_bringup_and_idle+0xd/0xf [ 21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \ c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5 <0f> 0b \ eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15 [ 21.970432] EIP: [] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10 [ 21.970432] ---[ end trace c0b71f7e12cf3011 ]--- Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 97612f5..321a0c8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu) evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_virq_info(evtchn, virq); bind_evtchn_to_cpu(evtchn, cpu); - - /* Ready for use. */ - unmask_evtchn(evtchn); } } @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu) evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_ipi_info(evtchn, ipi); bind_evtchn_to_cpu(evtchn, cpu); - - /* Ready for use. */ - unmask_evtchn(evtchn); - } } @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq) void xen_irq_resume(void) { unsigned int cpu, irq, evtchn; + struct irq_desc *desc; init_evtchn_cpu_bindings(); @@ -1408,6 +1402,23 @@ void xen_irq_resume(void) restore_cpu_virqs(cpu); restore_cpu_ipis(cpu); } + + /* + * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These + * are not handled by the IRQ core. + */ + for_each_irq_desc(irq, desc) { + if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND)) + continue; + if (desc->status & IRQ_DISABLED) + continue; + + evtchn = evtchn_from_irq(irq); + if (evtchn == -1) + continue; + + unmask_evtchn(evtchn); + } } static struct irq_chip xen_dynamic_chip __read_mostly = { -- cgit v0.10.2 From 9ec23a7f6d2537faf14368e066e307c06812c4ca Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 28 Oct 2010 11:32:29 -0700 Subject: xen: do not release any memory under 1M in domain 0 We already deliberately setup a 1-1 P2M for the region up to 1M in order to allow code which assumes this region is already mapped to work without having to convert everything to ioremap. Domain 0 should not return any apparently unused memory regions (reserved or otherwise) in this region to Xen since the e820 may not accurately reflect what the BIOS has stashed in this region. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b1dbdaa..769c4b0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -118,16 +118,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, const struct e820map *e820) { phys_addr_t max_addr = PFN_PHYS(max_pfn); - phys_addr_t last_end = 0; + phys_addr_t last_end = ISA_END_ADDRESS; unsigned long released = 0; int i; + /* Free any unused memory above the low 1Mbyte. */ for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { phys_addr_t end = e820->map[i].addr; end = min(max_addr, end); - released += xen_release_chunk(last_end, end); - last_end = e820->map[i].addr + e820->map[i].size; + if (last_end < end) + released += xen_release_chunk(last_end, end); + last_end = max(last_end, e820->map[i].addr + e820->map[i].size); } if (last_end < max_addr) @@ -164,6 +166,7 @@ char * __init xen_memory_setup(void) XENMEM_memory_map; rc = HYPERVISOR_memory_op(op, &memmap); if (rc == -ENOSYS) { + BUG_ON(xen_initial_domain()); memmap.nr_entries = 1; map[0].addr = 0ULL; map[0].size = mem_end; @@ -201,12 +204,13 @@ char * __init xen_memory_setup(void) } /* - * Even though this is normal, usable memory under Xen, reserve - * ISA memory anyway because too many things think they can poke + * In domU, the ISA region is normal, usable memory, but we + * reserve ISA memory anyway because too many things poke * about in there. * - * In a dom0 kernel, this region is identity mapped with the - * hardware ISA area, so it really is out of bounds. + * In Dom0, the host E820 information can leave gaps in the + * ISA range, which would cause us to release those pages. To + * avoid this, we unconditionally reserve them here. */ e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); -- cgit v0.10.2