From 4461bbc05bf11fa4251acded60e4645863a4158a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 10 Apr 2014 12:17:09 -0400 Subject: x86/xen: Fix 32-bit PV guests's usage of kernel_stack Commit 198d208df4371734ac4728f69cb585c284d20a15 ("x86: Keep thread_info on thread stack in x86_32") made 32-bit kernels use kernel_stack to point to thread_info. That change missed a couple of updates needed by Xen's 32-bit PV guests: 1. kernel_stack needs to be initialized for secondary CPUs 2. GET_THREAD_INFO() now uses %fs register which may not be the kernel's version when executing xen_iret(). With respect to the second issue, we don't need GET_THREAD_INFO() anymore: we used it as an intermediate step to get to per_cpu xen_vcpu and avoid referencing %fs. Now that we are going to use %fs anyway we may as well go directly to xen_vcpu. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a18eadd..7005974 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -441,10 +441,11 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); +#endif per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; -#endif + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_init_lock_cpu(cpu); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 33ca6e4..fd92a64 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -75,6 +75,17 @@ ENDPROC(xen_sysexit) * stack state in whatever form its in, we keep things simple by only * using a single register which is pushed/popped on the stack. */ + +.macro POP_FS +1: + popw %fs +.pushsection .fixup, "ax" +2: movw $0, (%esp) + jmp 1b +.popsection + _ASM_EXTABLE(1b,2b) +.endm + ENTRY(xen_iret) /* test eflags for special cases */ testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) @@ -83,15 +94,13 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* - * Store vcpu_info pointer for easy access. Do it this way to - * avoid having to reload %fs - */ + /* Store vcpu_info pointer for easy access */ #ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl %ss:TI_cpu(%eax), %eax - movl %ss:__per_cpu_offset(,%eax,4), %eax - mov %ss:xen_vcpu(%eax), %eax + pushw %fs + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs + movl %fs:xen_vcpu, %eax + POP_FS #else movl %ss:xen_vcpu, %eax #endif -- cgit v0.10.2 From cea37f87519ca3172a4e8ddd3ffcd2b4232b341f Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Tue, 1 Apr 2014 19:15:59 +0900 Subject: xen: fix memory leak in __xen_pcibk_add_pci_dev() It need to free dev_entry when it failed to assign to a new slot on the virtual PCI bus. smatch says: drivers/xen/xen-pciback/vpci.c:142 __xen_pcibk_add_pci_dev() warn: possible memory leak of 'dev_entry' Signed-off-by: Daeseok Youn Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 3165ce3..51afff9 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -137,6 +137,8 @@ unlock: /* Publish this device. */ if (!err) err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); + else + kfree(dev_entry); out: return err; -- cgit v0.10.2 From c0914e61660fa7d501ef9394b26f4847ef3dc98e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:24:59 +0300 Subject: xen-pciback: silence an unwanted debug printk There is a missing curly brace here so we might print some extra debug information. Signed-off-by: Dan Carpenter Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 929dd46..607e414 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -217,7 +217,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, if (result == 0) { for (i = 0; i < op->value; i++) { op->msix_entries[i].entry = entries[i].entry; - if (entries[i].vector) + if (entries[i].vector) { op->msix_entries[i].vector = xen_pirq_from_irq(entries[i].vector); if (unlikely(verbose_request)) @@ -225,6 +225,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, "MSI-X[%d]: %d\n", pci_name(dev), i, op->msix_entries[i].vector); + } } } else pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", -- cgit v0.10.2 From e0fc17a936334c08b2729fff87168c03fdecf5b6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 4 Apr 2014 14:48:04 -0400 Subject: xen/spinlock: Don't enable them unconditionally. The git commit a945928ea2709bc0e8e8165d33aed855a0110279 ('xen: Do not enable spinlocks before jump_label_init() has executed') was added to deal with the jump machinery. Earlier the code that turned on the jump label was only called by Xen specific functions. But now that it had been moved to the initcall machinery it gets called on Xen, KVM, and baremetal - ouch!. And the detection machinery to only call it on Xen wasn't remembered in the heat of merge window excitement. This means that the slowpath is enabled on baremetal while it should not be. Reported-by: Waiman Long Acked-by: Steven Rostedt CC: stable@vger.kernel.org CC: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 4d3acc3..0ba5f3b 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -274,7 +274,7 @@ void __init xen_init_spinlocks(void) printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); return; } - + printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } @@ -290,6 +290,9 @@ static __init int xen_init_spinlocks_jump(void) if (!xen_pvspin) return 0; + if (!xen_domain()) + return 0; + static_key_slow_inc(¶virt_ticketlocks_enabled); return 0; } -- cgit v0.10.2 From 027bd7e89906a076225b23d1ca4b6702c84e72dc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 4 Apr 2014 14:53:40 -0400 Subject: xen/xenbus: Avoid synchronous wait on XenBus stalling shutdown/restart. The 'read_reply' works with 'process_msg' to read of a reply in XenBus. 'process_msg' is running from within the 'xenbus' thread. Whenever a message shows up in XenBus it is put on a xs_state.reply_list list and 'read_reply' picks it up. The problem is if the backend domain or the xenstored process is killed. In which case 'xenbus' is still awaiting - and 'read_reply' if called - stuck forever waiting for the reply_list to have some contents. This is normally not a problem - as the backend domain can come back or the xenstored process can be restarted. However if the domain is in process of being powered off/restarted/halted - there is no point of waiting on it coming back - as we are effectively being terminated and should not impede the progress. This patch solves this problem by checking whether the guest is the right domain. If it is an initial domain and hurtling towards death - there is no point of continuing the wait. All other type of guests continue with their behavior (as Xenstore is expected to still be running in another domain). Fixes-Bug: http://bugs.xenproject.org/xen/bug/8 Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Boris Ostrovsky Reviewed-by: David Vrabel Signed-off-by: David Vrabel diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index b6d5fff..ba804f3 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -50,6 +50,7 @@ #include #include #include "xenbus_comms.h" +#include "xenbus_probe.h" struct xs_stored_msg { struct list_head list; @@ -139,6 +140,29 @@ static int get_error(const char *errorstring) return xsd_errors[i].errnum; } +static bool xenbus_ok(void) +{ + switch (xen_store_domain_type) { + case XS_LOCAL: + switch (system_state) { + case SYSTEM_POWER_OFF: + case SYSTEM_RESTART: + case SYSTEM_HALT: + return false; + default: + break; + } + return true; + case XS_PV: + case XS_HVM: + /* FIXME: Could check that the remote domain is alive, + * but it is normally initial domain. */ + return true; + default: + break; + } + return false; +} static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) { struct xs_stored_msg *msg; @@ -148,9 +172,20 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) while (list_empty(&xs_state.reply_list)) { spin_unlock(&xs_state.reply_lock); - /* XXX FIXME: Avoid synchronous wait for response here. */ - wait_event(xs_state.reply_waitq, - !list_empty(&xs_state.reply_list)); + if (xenbus_ok()) + /* XXX FIXME: Avoid synchronous wait for response here. */ + wait_event_timeout(xs_state.reply_waitq, + !list_empty(&xs_state.reply_list), + msecs_to_jiffies(500)); + else { + /* + * If we are in the process of being shut-down there is + * no point of trying to contact XenBus - it is either + * killed (xenstored application) or the other domain + * has been killed or is unreachable. + */ + return ERR_PTR(-EIO); + } spin_lock(&xs_state.reply_lock); } @@ -215,6 +250,9 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) mutex_unlock(&xs_state.request_mutex); + if (IS_ERR(ret)) + return ret; + if ((msg->type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) -- cgit v0.10.2 From eb47f71200b7d5b4c8c1f8c75675f592d855aafd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 4 Apr 2014 14:53:41 -0400 Subject: xen/manage: Poweroff forcefully if user-space is not yet up. The user can launch the guest in this sequence: xl create -p /vm.cfg [launch, but pause it] xl shutdown latest [sets control/shutdown=poweroff] xl unpause latest xl console latest [and see that the guest has completely ignored the shutdown request] In reality the guest hasn't ignored it. It registers a watch and gets a notification that there is value. It then calls the shutdown_handler which ends up calling orderly_shutdown. Unfortunately that is so early in the bootup that there are no user-space. Which means that the orderly_shutdown fails. But since the force flag was set to false it continues on without reporting. What we really want to is to use the force when we are in the SYSTEM_BOOTING state and not use the 'force' when SYSTEM_RUNNING. However, if we are in the running state - and the shutdown command has been given before the user-space has been setup, there is nothing we can do. Worst yet, we stop ignoring the 'xl shutdown' requests! As such, the other part of this patch is to only stop ignoring the 'xl shutdown' when we are truly in the power off sequence. That means the user can do multiple 'xl shutdown' and we will try to act on them instead of ignoring them. Fixes-Bug: http://bugs.xenproject.org/xen/bug/6 Reported-by: Alex Bligh Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index fc6c94c..32f9236 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -198,10 +198,32 @@ struct shutdown_handler { void (*cb)(void); }; +static int poweroff_nb(struct notifier_block *cb, unsigned long code, void *unused) +{ + switch (code) { + case SYS_DOWN: + case SYS_HALT: + case SYS_POWER_OFF: + shutting_down = SHUTDOWN_POWEROFF; + default: + break; + } + return NOTIFY_DONE; +} static void do_poweroff(void) { - shutting_down = SHUTDOWN_POWEROFF; - orderly_poweroff(false); + switch (system_state) { + case SYSTEM_BOOTING: + orderly_poweroff(true); + break; + case SYSTEM_RUNNING: + orderly_poweroff(false); + break; + default: + /* Don't do it when we are halting/rebooting. */ + pr_info("Ignoring Xen toolstack shutdown.\n"); + break; + } } static void do_reboot(void) @@ -307,6 +329,10 @@ static struct xenbus_watch shutdown_watch = { .callback = shutdown_handler }; +static struct notifier_block xen_reboot_nb = { + .notifier_call = poweroff_nb, +}; + static int setup_shutdown_watcher(void) { int err; @@ -317,6 +343,7 @@ static int setup_shutdown_watcher(void) return err; } + #ifdef CONFIG_MAGIC_SYSRQ err = register_xenbus_watch(&sysrq_watch); if (err) { @@ -345,6 +372,7 @@ int xen_setup_shutdown_event(void) if (!xen_domain()) return -ENODEV; register_xenstore_notifier(&xenstore_notifier); + register_reboot_notifier(&xen_reboot_nb); return 0; } -- cgit v0.10.2