From db05fed0ad72f264e39bcb366795f7367384ec92 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Nov 2009 16:41:47 -0800 Subject: xen/xenbus: make DEVICE_ATTR()s static They don't need to be global, and may cause linker clashes. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d42e25d..3800da7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -454,21 +454,21 @@ static ssize_t xendev_show_nodename(struct device *dev, { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); +static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); static ssize_t xendev_show_devtype(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); +static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); static ssize_t xendev_show_modalias(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); } -DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); +static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, -- cgit v0.10.2 From c6e1971139be1342902873181f3b80a979bfb33b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Jul 2009 12:27:37 +0200 Subject: xen: fix is_disconnected_device/exists_disconnected_device The logic of is_disconnected_device/exists_disconnected_device is wrong in that they are used to test whether a device is trying to connect (i.e. connecting). For this reason the patch fixes them to not consider a Closing or Closed device to be connecting. At the same time the patch also renames the functions according to what they really do; you could say a closed device is "disconnected" (the old name), but not "connecting" (the new name). This patch is a backport of changeset 909 from the Xenbits tree. Cc: Jeremy Fitzhardinge Signed-off-by: Paolo Bonzini Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3800da7..19bce3e 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -843,7 +843,7 @@ postcore_initcall(xenbus_probe_init); MODULE_LICENSE("GPL"); -static int is_disconnected_device(struct device *dev, void *data) +static int is_device_connecting(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; @@ -861,14 +861,15 @@ static int is_disconnected_device(struct device *dev, void *data) return 0; xendrv = to_xenbus_driver(dev->driver); - return (xendev->state != XenbusStateConnected || - (xendrv->is_ready && !xendrv->is_ready(xendev))); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); } -static int exists_disconnected_device(struct device_driver *drv) +static int exists_connecting_device(struct device_driver *drv) { return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - is_disconnected_device); + is_device_connecting); } static int print_device_status(struct device *dev, void *data) @@ -918,7 +919,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv) if (!ready_to_wait_for_devices || !xen_domain()) return; - while (exists_disconnected_device(drv)) { + while (exists_connecting_device(drv)) { if (time_after(jiffies, timeout)) break; schedule_timeout_interruptible(HZ/10); -- cgit v0.10.2 From f8dc33088febc63286b7a60e6b678de8e064de8e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Jul 2009 12:27:38 +0200 Subject: xen: improvement to wait_for_devices() When printing a warning about a timed-out device, print the current state of both ends of the device connection (i.e., backend as well as frontend). This backports half of changeset 146 from the Xenbits tree. Cc: Jeremy Fitzhardinge Signed-off-by: Paolo Bonzini Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 19bce3e..9246a8c 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -885,10 +885,13 @@ static int print_device_status(struct device *dev, void *data) /* Information only: is this too noisy? */ printk(KERN_INFO "XENBUS: Device with no driver: %s\n", xendev->nodename); - } else if (xendev->state != XenbusStateConnected) { + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (state %d)\n", - xendev->nodename, xendev->state); + "to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); } return 0; -- cgit v0.10.2 From ae7888012969355a548372e99b066d9e31153b62 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Jul 2009 12:27:39 +0200 Subject: xen: wait up to 5 minutes for device connetion Increases the device timeout from 10s to 5 minutes, giving the user a visual indication during that time in case there are problems. The patch is a backport of changesets 144 and 150 in the Xenbits tree. Cc: Jeremy Fitzhardinge Signed-off-by: Paolo Bonzini Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 9246a8c..649fcdf 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -901,7 +901,7 @@ static int print_device_status(struct device *dev, void *data) static int ready_to_wait_for_devices; /* - * On a 10 second timeout, wait for all devices currently configured. We need + * On a 5-minute timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices * needed for boot are available, before we can allow the boot to proceed. * @@ -916,18 +916,30 @@ static int ready_to_wait_for_devices; */ static void wait_for_devices(struct xenbus_driver *xendrv) { - unsigned long timeout = jiffies + 10*HZ; + unsigned long start = jiffies; struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; if (!ready_to_wait_for_devices || !xen_domain()) return; while (exists_connecting_device(drv)) { - if (time_after(jiffies, timeout)) - break; + if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { + if (!seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + seconds_waited += 5; + printk("%us...", 300 - seconds_waited); + if (seconds_waited == 300) + break; + } + schedule_timeout_interruptible(HZ/10); } + if (seconds_waited) + printk("\n"); + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, print_device_status); } -- cgit v0.10.2 From be012920ecba161ad20303a3f6d9e96c58cf97c7 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 08:35:55 +0800 Subject: xen: re-register runstate area earlier on resume. This is necessary to ensure the runstate area is available to xen_sched_clock before any calls to printk which will require it in order to provide a timestamp. I chose to pull the xen_setup_runstate_info out of xen_time_init into the caller in order to maintain parity with calling xen_setup_runstate_info separately from calling xen_time_resume. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dfbf70e..cb61f77 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -148,6 +148,8 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) BUG(); + xen_setup_runstate_info(cpu); + xen_vcpu_setup(cpu); if (other_cpu && diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0a5aa44..6bbff94 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; } -static void setup_runstate_info(int cpu) +void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -442,8 +442,6 @@ void xen_setup_timer(int cpu) evt->cpumask = cpumask_of(cpu); evt->irq = irq; - - setup_runstate_info(cpu); } void xen_teardown_timer(int cpu) @@ -494,6 +492,7 @@ __init void xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 355fa6b..3252932 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -41,6 +41,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); +void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); -- cgit v0.10.2 From 3905bb2aa7bb801b31946b37a4635ebac4009051 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Nov 2009 08:46:29 +0800 Subject: xen: restore runstate_info even if !have_vcpu_info_placement Even if have_vcpu_info_placement is not set, we still need to set up the runstate area on each resumed vcpu. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cb61f77..a7b49f9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -138,26 +138,23 @@ static void xen_vcpu_setup(int cpu) */ void xen_vcpu_restore(void) { - if (have_vcpu_info_placement) { - int cpu; + int cpu; - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); - xen_setup_runstate_info(cpu); + xen_setup_runstate_info(cpu); + if (have_vcpu_info_placement) xen_vcpu_setup(cpu); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } - - BUG_ON(!have_vcpu_info_placement); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); } } -- cgit v0.10.2 From fa24ba62ea2869308ffc9f0b286ac9650b4ca6cb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 11:32:49 +0000 Subject: xen: correctly restore pfn_to_mfn_list_list after resume pvops kernels >= 2.6.30 can currently only be saved and restored once. The second attempt to save results in: ERROR Internal error: Frame# in pfn-to-mfn frame list is not in pseudophys ERROR Internal error: entry 0: p2m_frame_list[0] is 0xf2c2c2c2, max 0x120000 ERROR Internal error: Failed to map/save the p2m frame list I finally narrowed it down to: commit cdaead6b4e657f960d6d6f9f380e7dfeedc6a09b Author: Jeremy Fitzhardinge Date: Fri Feb 27 15:34:59 2009 -0800 xen: split construction of p2m mfn tables from registration Build the p2m_mfn_list_list early with the rest of the p2m table, but register it later when the real shared_info structure is in place. Signed-off-by: Jeremy Fitzhardinge The unforeseen side-effect of this change was to cause the mfn list list to not be rebuilt on resume. Prior to this change it would have been rebuilt via xen_post_suspend() -> xen_setup_shared_info() -> xen_setup_mfn_list_list(). Fix by explicitly calling xen_build_mfn_list_list() from xen_post_suspend(). Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3bf7b1d..bf4cd6b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -static void __init xen_build_mfn_list_list(void) +void xen_build_mfn_list_list(void) { unsigned pfn, idx; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 95be7b4..6343a5d 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -27,6 +27,8 @@ void xen_pre_suspend(void) void xen_post_suspend(int suspend_cancelled) { + xen_build_mfn_list_list(); + xen_setup_shared_info(); if (suspend_cancelled) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3252932..f9153a3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); -- cgit v0.10.2 From f350c7922faad3397c98c81a9e5658f5a1ef0214 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 10:16:23 +0000 Subject: xen: register timer interrupt with IRQF_TIMER Otherwise the timer is disabled by dpm_suspend_noirq() which in turn prevents correct operation of stop_machine on multi-processor systems and breaks suspend. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6bbff94..9d1f853 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -434,7 +434,7 @@ void xen_setup_timer(int cpu) name = ""; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, name, NULL); evt = &per_cpu(xen_clock_events, cpu); -- cgit v0.10.2 From 028896721ac04f6fa0697f3ecac3f98761746363 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 09:32:48 -0800 Subject: xen: register runstate on secondary CPUs The commit "xen: re-register runstate area earlier on resume" caused us to never try and setup the runstate area for secondary CPUs. Ensure that we do this... Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eee..360f8d8 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_init_lock_cpu(cpu); -- cgit v0.10.2 From 499d19b82b586aef18727b9ae1437f8f37b66e91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Nov 2009 09:38:25 -0800 Subject: xen: register runstate info for boot CPU early printk timestamping uses sched_clock, which in turn relies on runstate info under Xen. So make sure we set it up before any printks can be called. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a7b49f9..79f9738 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1181,6 +1181,8 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("about to get started...\n"); + xen_setup_runstate_info(0); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); -- cgit v0.10.2 From 922cc38ab71d1360978e65207e4a4f4988987127 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Nov 2009 09:58:49 -0800 Subject: xen: don't call dpm_resume_noirq() with interrupts disabled. dpm_resume_noirq() takes a mutex, so it can't be called from a no-interrupt context. Don't call it from within the stop-machine function, but just afterwards, since we're resuming anyway, regardless of what happened. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 10d03d7..7b69a1a 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -43,7 +43,6 @@ static int xen_suspend(void *data) if (err) { printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n", err); - dpm_resume_noirq(PMSG_RESUME); return err; } @@ -69,7 +68,6 @@ static int xen_suspend(void *data) } sysdev_resume(); - dpm_resume_noirq(PMSG_RESUME); return 0; } @@ -108,6 +106,9 @@ static void do_suspend(void) } err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); + + dpm_resume_noirq(PMSG_RESUME); + if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); goto out; @@ -119,8 +120,6 @@ static void do_suspend(void) } else xs_suspend_cancel(); - dpm_resume_noirq(PMSG_RESUME); - resume_devices: dpm_resume_end(PMSG_RESUME); -- cgit v0.10.2 From 6aaf5d633bb6cead81b396d861d7bae4b9a0ba7e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Nov 2009 13:15:38 -0800 Subject: xen: use iret for return from 64b kernel to 32b usermode If Xen wants to return to a 32b usermode with sysret it must use the right form. When using VCGF_in_syscall to trigger this, it looks at the code segment and does a 32b sysret if it is FLAT_USER_CS32. However, this is different from __USER32_CS, so it fails to return properly if we use the normal Linux segment. So avoid the whole mess by dropping VCGF_in_syscall and simply use plain iret to return to usermode. Signed-off-by: Jeremy Fitzhardinge Acked-by: Jan Beulich Cc: Stable Kernel diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 02f496a..53adefd 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -96,7 +96,7 @@ ENTRY(xen_sysret32) pushq $__USER32_CS pushq %rcx - pushq $VGCF_in_syscall + pushq $0 1: jmp hypercall_iret ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) @@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax - pushq $VGCF_in_syscall + pushq $0 jmp hypercall_iret ENDPROC(xen_syscall32_target) ENDPROC(xen_sysenter_target) -- cgit v0.10.2 From f6eafe3665bcc374c66775d58312d1c06c55303f Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 25 Nov 2009 14:12:08 +0000 Subject: xen: call clock resume notifier on all CPUs tick_resume() is never called on secondary processors. Presumably this is because they are offlined for suspend on native and so this is normally taken care of in the CPU onlining path. Under Xen we keep all CPUs online over a suspend. This patch papers over the issue for me but I will investigate a more generic, less hacky, way of doing to the same. tick_suspend is also only called on the boot CPU which I presume should be fixed too. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Thomas Gleixner diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 6343a5d..987267f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -46,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled) } +static void xen_vcpu_notify_restore(void *data) +{ + unsigned long reason = (unsigned long)data; + + /* Boot processor notified via generic timekeeping_resume() */ + if ( smp_processor_id() == 0) + return; + + clockevents_notify(reason, NULL); +} + void xen_arch_resume(void) { - /* nothing */ + smp_call_function(xen_vcpu_notify_restore, + (void *)CLOCK_EVT_NOTIFY_RESUME, 1); } -- cgit v0.10.2 From fed5ea87e02aaf902ff38c65b4514233db03dc09 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 1 Dec 2009 16:15:30 +0000 Subject: xen: don't leak IRQs over suspend/resume. On resume irq_info[*].evtchn is reset to 0 since event channel mappings are not preserved over suspend/resume. The other contents of irq_info is preserved to allow rebind_evtchn_irq() to function. However when a device resumes it will try to unbind from the previous IRQ (e.g. blkfront goes blkfront_resume() -> blkif_free() -> unbind_from_irqhandler() -> unbind_from_irq()). This will fail due to the check for VALID_EVTCHN in unbind_from_irq() and the IRQ is leaked. The device will then continue to resume and allocate a new IRQ, eventually leading to find_unbound_irq() panic()ing. Fix this by changing unbind_from_irq() to handle teardown of interrupts which have type!=IRQT_UNBOUND but are not currently bound to a specific event channel. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 2f57276..ce602dd 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -474,6 +474,9 @@ static void unbind_from_irq(unsigned int irq) bind_evtchn_to_cpu(evtchn, 0); evtchn_to_irq[evtchn] = -1; + } + + if (irq_info[irq].type != IRQT_UNBOUND) { irq_info[irq] = mk_unbound_info(); dynamic_irq_cleanup(irq); -- cgit v0.10.2 From 65f63384b391bf4d384327d8a7c6de9860290b5c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 1 Dec 2009 11:47:14 +0000 Subject: xen: improve error handling in do_suspend. The existing error handling has a few issues: - If freeze_processes() fails it exits with shutting_down = SHUTDOWN_SUSPEND. - If dpm_suspend_noirq() fails it exits without resuming xenbus. - If stop_machine() fails it exits without resuming xenbus or calling dpm_resume_end(). - xs_suspend()/xs_resume() and dpm_suspend_noirq()/dpm_resume_noirq() were not nested in the obvious way. Fix by ensuring each failure case goto's the correct label. Treat a failure of stop_machine() as a cancelled suspend in order to follow the correct resume path. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 7b69a1a..2fb7d39 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -86,32 +86,32 @@ static void do_suspend(void) err = freeze_processes(); if (err) { printk(KERN_ERR "xen suspend: freeze failed %d\n", err); - return; + goto out; } #endif err = dpm_suspend_start(PMSG_SUSPEND); if (err) { printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err); - goto out; + goto out_thaw; } - printk(KERN_DEBUG "suspending xenstore...\n"); - xs_suspend(); - err = dpm_suspend_noirq(PMSG_SUSPEND); if (err) { printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); - goto resume_devices; + goto out_resume; } + printk(KERN_DEBUG "suspending xenstore...\n"); + xs_suspend(); + err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); dpm_resume_noirq(PMSG_RESUME); if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); - goto out; + cancelled = 1; } if (!cancelled) { @@ -120,15 +120,17 @@ static void do_suspend(void) } else xs_suspend_cancel(); -resume_devices: +out_resume: dpm_resume_end(PMSG_RESUME); /* Make sure timer events get retriggered on all CPUs */ clock_was_set(); -out: + +out_thaw: #ifdef CONFIG_PREEMPT thaw_processes(); #endif +out: shutting_down = SHUTDOWN_INVALID; } #endif /* CONFIG_PM_SLEEP */ -- cgit v0.10.2 From b4606f2165153833247823e8c04c5e88cb3d298b Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 1 Dec 2009 11:47:15 +0000 Subject: xen: explicitly create/destroy stop_machine workqueues outside suspend/resume region. I have observed cases where the implicit stop_machine_destroy() done by stop_machine() hangs while destroying the workqueues, specifically in kthread_stop(). This seems to be because timer ticks are not restarted until after stop_machine() returns. Fortunately stop_machine provides a facility to pre-create/post-destroy the workqueues so use this to ensure that workqueues are only destroyed after everything is really up and running again. I only actually observed this failure with 2.6.30. It seems that newer kernels are somehow more robust against doing kthread_stop() without timer interrupts (I tried some backports of some likely looking candidates but did not track down the commit which added this robustness). However this change seems like a reasonable belt&braces thing to do. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 2fb7d39..c499793 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -79,6 +79,12 @@ static void do_suspend(void) shutting_down = SHUTDOWN_SUSPEND; + err = stop_machine_create(); + if (err) { + printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err); + goto out; + } + #ifdef CONFIG_PREEMPT /* If the kernel is preemptible, we need to freeze all the processes to prevent them from being in the middle of a pagetable update @@ -86,7 +92,7 @@ static void do_suspend(void) err = freeze_processes(); if (err) { printk(KERN_ERR "xen suspend: freeze failed %d\n", err); - goto out; + goto out_destroy_sm; } #endif @@ -129,7 +135,11 @@ out_resume: out_thaw: #ifdef CONFIG_PREEMPT thaw_processes(); + +out_destroy_sm: #endif + stop_machine_destroy(); + out: shutting_down = SHUTDOWN_INVALID; } -- cgit v0.10.2 From 3d65c9488cadd2f11bd4d60c7266e639ece5d0d6 Mon Sep 17 00:00:00 2001 From: Gianluca Guida Date: Thu, 30 Jul 2009 22:54:36 +0100 Subject: Xen balloon: fix totalram_pages counting. Change totalram_pages when a single page is added/removed to the ballooned list. This avoid totalram_pages to be set erroneously to max_pfn at boot. Signed-off-by: Gianluca Guida Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d31505b..6eb6265 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -136,6 +136,8 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } + + totalram_pages--; } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ @@ -156,6 +158,8 @@ static struct page *balloon_retrieve(void) else balloon_stats.balloon_low--; + totalram_pages++; + return page; } @@ -260,7 +264,6 @@ static int increase_reservation(unsigned long nr_pages) } balloon_stats.current_pages += nr_pages; - totalram_pages = balloon_stats.current_pages; out: spin_unlock_irqrestore(&balloon_lock, flags); @@ -323,7 +326,6 @@ static int decrease_reservation(unsigned long nr_pages) BUG_ON(ret != nr_pages); balloon_stats.current_pages -= nr_pages; - totalram_pages = balloon_stats.current_pages; spin_unlock_irqrestore(&balloon_lock, flags); @@ -422,7 +424,6 @@ static int __init balloon_init(void) pr_info("xen_balloon: Initialising balloon driver.\n"); balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); - totalram_pages = balloon_stats.current_pages; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; -- cgit v0.10.2 From bc2c0303226ec716854d3c208c7f84fe7aa35cd7 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 5 Jun 2009 11:58:37 +0100 Subject: xen: try harder to balloon up under memory pressure. Currently if the balloon driver is unable to increase the guest's reservation it assumes the failure was due to reaching its full allocation, gives up on the ballooning operation and records the limit it reached as the "hard limit". The driver will not try again until the target is set again (even to the same value). However it is possible that ballooning has in fact failed due to memory pressure in the host and therefore it is desirable to keep attempting to reach the target in case memory becomes available. The most likely scenario is that some guests are ballooning down while others are ballooning up and therefore there is temporary memory pressure while things stabilise. You would not expect a well behaved toolstack to ask a domain to balloon to more than its allocation nor would you expect it to deliberately over-commit memory by setting balloon targets which exceed the total host memory. This patch drops the concept of a hard limit and causes the balloon driver to retry increasing the reservation on a timer in the same manner as when decreasing the reservation. Also if we partially succeed in increasing the reservation (i.e. receive less pages than we asked for) then we may as well keep those pages rather than returning them to Xen. Signed-off-by: Ian Campbell Cc: Stable Kernel diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 6eb6265..4204336 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -66,8 +66,6 @@ struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; - /* We may hit the hard limit in Xen. If we do then we remember it. */ - unsigned long hard_limit; /* * Drivers may alter the memory reservation independently, but they * must inform the balloon driver so we avoid hitting the hard limit. @@ -185,7 +183,7 @@ static void balloon_alarm(unsigned long unused) static unsigned long current_target(void) { - unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); + unsigned long target = balloon_stats.target_pages; target = min(target, balloon_stats.current_pages + @@ -221,23 +219,10 @@ static int increase_reservation(unsigned long nr_pages) set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - if (rc < nr_pages) { - if (rc > 0) { - int ret; - - /* We hit the Xen hard limit: reprobe. */ - reservation.nr_extents = rc; - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - BUG_ON(ret != rc); - } - if (rc >= 0) - balloon_stats.hard_limit = (balloon_stats.current_pages + rc - - balloon_stats.driver_pages); + if (rc < 0) goto out; - } - for (i = 0; i < nr_pages; i++) { + for (i = 0; i < rc; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); @@ -263,12 +248,12 @@ static int increase_reservation(unsigned long nr_pages) __free_page(page); } - balloon_stats.current_pages += nr_pages; + balloon_stats.current_pages += rc; out: spin_unlock_irqrestore(&balloon_lock, flags); - return 0; + return rc < 0 ? rc : rc != nr_pages; } static int decrease_reservation(unsigned long nr_pages) @@ -369,7 +354,6 @@ static void balloon_process(struct work_struct *work) static void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ - balloon_stats.hard_limit = ~0UL; balloon_stats.target_pages = target; schedule_work(&balloon_worker); } @@ -428,7 +412,6 @@ static int __init balloon_init(void) balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; balloon_stats.driver_pages = 0UL; - balloon_stats.hard_limit = ~0UL; init_timer(&balloon_timer); balloon_timer.data = 0; @@ -473,9 +456,6 @@ module_exit(balloon_exit); BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); -BALLOON_SHOW(hard_limit_kb, - (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", - (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, @@ -545,7 +525,6 @@ static struct attribute *balloon_info_attrs[] = { &attr_current_kb.attr, &attr_low_kb.attr, &attr_high_kb.attr, - &attr_hard_limit_kb.attr, &attr_driver_kb.attr, NULL }; -- cgit v0.10.2