From e3cc067b0a79d3a3672bfe7cfba12f2e8ae56039 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:31:22 -0700 Subject: xen/evtchn: track enabled state for each port enable/disable_irq() complain if the enables/disables are unbalanced, so keep track of the state and avoid redundant enables. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index af03195..4356a9a 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -69,10 +69,36 @@ struct per_user_data { const char *name; }; -/* Who's bound to each port? */ -static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +/* + * Who's bound to each port? This is logically an array of struct + * per_user_data *, but we encode the current enabled-state in bit 0. + */ +static unsigned long port_user[NR_EVENT_CHANNELS]; static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ +static inline struct per_user_data *get_port_user(unsigned port) +{ + return (struct per_user_data *)(port_user[port] & ~1); +} + +static inline void set_port_user(unsigned port, struct per_user_data *u) +{ + port_user[port] = (unsigned long)u; +} + +static inline bool get_port_enabled(unsigned port) +{ + return port_user[port] & 1; +} + +static inline void set_port_enabled(unsigned port, bool enabled) +{ + if (enabled) + port_user[port] |= 1; + else + port_user[port] &= ~1; +} + irqreturn_t evtchn_interrupt(int irq, void *data) { unsigned int port = (unsigned long)data; @@ -80,9 +106,15 @@ irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&port_user_lock); - u = port_user[port]; + u = get_port_user(port); + + if (WARN(!get_port_enabled(port), + "Interrupt for port %d, but apparently not enabled; per-user %p\n", + port, u)) + goto out; disable_irq_nosync(irq); + set_port_enabled(port, false); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; @@ -92,10 +124,10 @@ irqreturn_t evtchn_interrupt(int irq, void *data) kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN); } - } else { + } else u->ring_overflow = 1; - } +out: spin_unlock(&port_user_lock); return IRQ_HANDLED; @@ -198,9 +230,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, goto out; spin_lock_irq(&port_user_lock); - for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) - if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) - enable_irq(irq_from_evtchn(kbuf[i])); + + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { + unsigned port = kbuf[i]; + + if (port < NR_EVENT_CHANNELS && + get_port_user(port) == u && + !get_port_enabled(port)) { + set_port_enabled(port, true); + enable_irq(irq_from_evtchn(port)); + } + } + spin_unlock_irq(&port_user_lock); rc = count; @@ -222,8 +263,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) * interrupt handler yet, and our caller has already * serialized bind operations.) */ - BUG_ON(port_user[port] != NULL); - port_user[port] = u; + BUG_ON(get_port_user(port) != NULL); + set_port_user(port, u); rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); @@ -242,7 +283,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) /* make sure we unbind the irq handler before clearing the port */ barrier(); - port_user[port] = NULL; + set_port_user(port, NULL); } static long evtchn_ioctl(struct file *file, @@ -333,7 +374,7 @@ static long evtchn_ioctl(struct file *file, spin_lock_irq(&port_user_lock); rc = -ENOTCONN; - if (port_user[unbind.port] != u) { + if (get_port_user(unbind.port) != u) { spin_unlock_irq(&port_user_lock); break; } @@ -355,7 +396,7 @@ static long evtchn_ioctl(struct file *file, if (notify.port >= NR_EVENT_CHANNELS) { rc = -EINVAL; - } else if (port_user[notify.port] != u) { + } else if (get_port_user(notify.port) != u) { rc = -ENOTCONN; } else { notify_remote_via_evtchn(notify.port); @@ -444,10 +485,10 @@ static int evtchn_release(struct inode *inode, struct file *filp) free_page((unsigned long)u->ring); for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (port_user[i] != u) + if (get_port_user(i) != u) continue; - evtchn_unbind_from_user(port_user[i], i); + evtchn_unbind_from_user(get_port_user(i), i); } spin_unlock_irq(&port_user_lock); -- cgit v0.10.2 From 93afe0b75ef3675ca05320919a57de8b9bbb159c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:36:58 -0700 Subject: xen/evtchn: dynamically allocate port_user array We only need the array when running as a Xen domain, so dynamically allocate it as needed to save on bss space. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 4356a9a..709c32d 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -73,7 +73,7 @@ struct per_user_data { * Who's bound to each port? This is logically an array of struct * per_user_data *, but we encode the current enabled-state in bit 0. */ -static unsigned long port_user[NR_EVENT_CHANNELS]; +static unsigned long *port_user; static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ static inline struct per_user_data *get_port_user(unsigned port) @@ -522,8 +522,11 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; + port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); + if (port_user == NULL) + return -ENOMEM; + spin_lock_init(&port_user_lock); - memset(port_user, 0, sizeof(port_user)); /* Create '/dev/misc/evtchn'. */ err = misc_register(&evtchn_miscdev); @@ -539,6 +542,9 @@ static int __init evtchn_init(void) static void __exit evtchn_cleanup(void) { + kfree(port_user); + port_user = NULL; + misc_deregister(&evtchn_miscdev); } -- cgit v0.10.2 From 0edce91dcd83160019867a00746c679344dc0bbd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:55:29 -0700 Subject: xen/evtchn: ports start enabled Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 709c32d..72dc7f2 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -108,10 +108,9 @@ irqreturn_t evtchn_interrupt(int irq, void *data) u = get_port_user(port); - if (WARN(!get_port_enabled(port), - "Interrupt for port %d, but apparently not enabled; per-user %p\n", - port, u)) - goto out; + WARN(!get_port_enabled(port), + "Interrupt for port %d, but apparently not enabled; per-user %p\n", + port, u); disable_irq_nosync(irq); set_port_enabled(port, false); @@ -127,7 +126,6 @@ irqreturn_t evtchn_interrupt(int irq, void *data) } else u->ring_overflow = 1; -out: spin_unlock(&port_user_lock); return IRQ_HANDLED; @@ -265,6 +263,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) */ BUG_ON(get_port_user(port) != NULL); set_port_user(port, u); + set_port_enabled(port, true); /* start enabled */ rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); -- cgit v0.10.2 From 1a1a17cddbfb1f81222b3f18ee8530c41fbc3b82 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 17:13:41 -0700 Subject: xen/evtchn: remove spurious barrier evtchn_unbind_from_user() is called under a lock, so there's no need to worry about the ordering of unbind_from_irqhandler vs clearing the port per-user data. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 72dc7f2..f79ac5c 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -279,9 +279,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) unbind_from_irqhandler(irq, (void *)(unsigned long)port); - /* make sure we unbind the irq handler before clearing the port */ - barrier(); - set_port_user(port, NULL); } -- cgit v0.10.2 From 3f5e554f669098c84c82ce75e7577f7e0f3fccde Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 28 May 2010 15:28:27 -0700 Subject: xen/evtchn: don't do unbind_from_irqhandler under spinlock unbind_from_irqhandler can end up doing /proc operations, which can't happen under a spinlock. So before removing the IRQ handler, disable the irq under the port_user lock (masking the underlying event channel and making sure the irq handler isn't running concurrently and won't start running), then remove the handler without the lock. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f79ac5c..6a3a129 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -375,10 +375,12 @@ static long evtchn_ioctl(struct file *file, break; } - evtchn_unbind_from_user(u, unbind.port); + disable_irq(irq_from_evtchn(unbind.port)); spin_unlock_irq(&port_user_lock); + evtchn_unbind_from_user(u, unbind.port); + rc = 0; break; } @@ -484,11 +486,18 @@ static int evtchn_release(struct inode *inode, struct file *filp) if (get_port_user(i) != u) continue; - evtchn_unbind_from_user(get_port_user(i), i); + disable_irq(irq_from_evtchn(i)); } spin_unlock_irq(&port_user_lock); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (get_port_user(i) != u) + continue; + + evtchn_unbind_from_user(get_port_user(i), i); + } + kfree(u->name); kfree(u); -- cgit v0.10.2 From 376d908f52427591cef4acd172db9c3ef28676ec Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Fri, 28 May 2010 15:43:49 -0700 Subject: xen/evtchn: Fix name of Xen event-channel device The Xen event-channel device is named evtchn in the kernel but always used as /dev/xen/evtchn in userspace. This patch fixes the name. Signed-off-by: Bastian Blank Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 6a3a129..68119f6 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -517,7 +517,7 @@ static const struct file_operations evtchn_fops = { static struct miscdevice evtchn_miscdev = { .minor = MISC_DYNAMIC_MINOR, - .name = "evtchn", + .name = "xen/evtchn", .fops = &evtchn_fops, }; static int __init evtchn_init(void) -- cgit v0.10.2 From 70697d540c0598ad023a391d4c895044db9a6624 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 5 Oct 2010 11:13:44 -0700 Subject: xen/evtchn: add missing static Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 68119f6..f3594ec 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -99,7 +99,7 @@ static inline void set_port_enabled(unsigned port, bool enabled) port_user[port] &= ~1; } -irqreturn_t evtchn_interrupt(int irq, void *data) +static irqreturn_t evtchn_interrupt(int irq, void *data) { unsigned int port = (unsigned long)data; struct per_user_data *u; -- cgit v0.10.2 From 313e74412105c670ff8900ec8099a3a5df1fa83c Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Thu, 28 Oct 2010 15:39:02 +0400 Subject: xen: xenfs: privcmd: check put_user() return code put_user() may fail. In this case propagate error code from privcmd_ioctl_mmap_batch(). Signed-off-by: Vasiliy Kulikov Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index f80be7f..2eb04c8 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -266,9 +266,7 @@ static int mmap_return_errors(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - put_user(*mfnp, st->user++); - - return 0; + return put_user(*mfnp, st->user++); } static struct vm_operations_struct privcmd_vm_ops; @@ -323,10 +321,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) up_write(&mm->mmap_sem); if (state.err > 0) { - ret = 0; - state.user = m.arr; - traverse_pages(m.num, sizeof(xen_pfn_t), + ret = traverse_pages(m.num, sizeof(xen_pfn_t), &pagelist, mmap_return_errors, &state); } -- cgit v0.10.2 From e060e7af98182494b764d002eba7fa022fe91bdf Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 11 Nov 2010 12:37:43 -0800 Subject: xen: set vma flag VM_PFNMAP in the privcmd mmap file_op Set VM_PFNMAP in the privcmd mmap file_op, rather than later in xen_remap_domain_mfn_range when it is too late because vma_wants_writenotify has already been called and vm_page_prot has already been modified. Signed-off-by: Stefano Stabellini Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f08ea04..792de434 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2299,7 +2299,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == + (VM_PFNMAP | VM_RESERVED | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 2eb04c8..88474d4 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -380,8 +380,9 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + /* DONTCOPY is essential for Xen because copy_page_range doesn't know + * how to recreate these mappings */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; vma->vm_ops = &privcmd_vm_ops; vma->vm_private_data = NULL; -- cgit v0.10.2 From 7e77506a5918d82cafa2ffa783ab57c23f9e9817 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 30 Sep 2010 12:37:26 +0100 Subject: xen: implement XENMEM_machphys_mapping This hypercall allows Xen to specify a non-default location for the machine to physical mapping. This capability is used when running a 32 bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to exactly the size required. [ Impact: add Xen hypercall definitions ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1..1c10c88 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e00..8413688 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d266..839a481 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c141..8760cc6 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4..bd35549 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1097,6 +1102,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7..bd2713a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13..eac3ce1 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -141,6 +141,19 @@ struct xen_machphys_mfn_list { DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +/* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. -- cgit v0.10.2 From 744f9f104ea262de1dc3e29265870c649f0d9473 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 11:44:39 -0500 Subject: xen: fix header export to userspace scripts/headers_install.pl prevents "__user" from being exported to userspace headers, so just use compiler.h to make sure that __user is defined and avoid the error. unifdef: linux-next-20101112/xx64/usr/include/xen/privcmd.h.tmp: 79: Premature EOF (#if line 33 depth 1) Signed-off-by: Randy Dunlap Cc: Jeremy Fitzhardinge Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xensource.com (moderated for non-subscribers) Cc: virtualization@lists.osdl.org Cc: Tony Finch Signed-off-by: Jeremy Fitzhardinge diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index b42cdfd..17857fb 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -34,13 +34,10 @@ #define __LINUX_PUBLIC_PRIVCMD_H__ #include +#include typedef unsigned long xen_pfn_t; -#ifndef __user -#define __user -#endif - struct privcmd_hypercall { __u64 op; __u64 arg[5]; -- cgit v0.10.2 From fe61f1d737f7804e0bd440ace9724e669e2c2906 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Nov 2010 11:06:46 -0800 Subject: xen/xenfs: update xenfs_mount for new prototype .mount now returns a struct dentry *. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index f6339d1..990ee42 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -121,9 +121,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) return rc; } -static int xenfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static struct dentry *xenfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) { return mount_single(fs_type, flags, data, xenfs_fill_super); } -- cgit v0.10.2 From 1c6969ec8e6328e8d288fc585310e9e52fc9db04 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Nov 2010 14:55:33 -0800 Subject: xen/evtchn: clear secondary CPUs' cpu_evtchn_mask[] after restore To bind all event channels to CPU#0, it is not sufficient to set all of its cpu_evtchn_mask[] bits; all other CPUs also need to get their bits cleared. Otherwise, evtchn_do_upcall() will start handling interrupts on CPUs they're not intended to run on, which can be particularly bad for per-CPU ones. [ linux-2.6.18-xen.hg 7de7453dee36 ] Signed-off-by: Jan Beulich Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 321a0c8..d770b8c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -286,9 +286,9 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) static void init_evtchn_cpu_bindings(void) { + int i; #ifdef CONFIG_SMP struct irq_desc *desc; - int i; /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { @@ -296,7 +296,10 @@ static void init_evtchn_cpu_bindings(void) } #endif - memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s)); + for_each_possible_cpu(i) + memset(cpu_evtchn_mask(i), + (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s)); + } static inline void clear_evtchn(int port) -- cgit v0.10.2 From e04195644eea7c6c14007922257704ec67156cd1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Nov 2010 14:56:47 -0800 Subject: xen/events: use locked set|clear_bit() for cpu_evtchn_mask The per-cpu event channel masks can be updated unlocked from multiple CPUs, so use the locked variant. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index d770b8c..d6d4f76 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -278,8 +278,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); - __set_bit(chn, cpu_evtchn_mask(cpu)); + clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); + set_bit(chn, cpu_evtchn_mask(cpu)); irq_info[irq].cpu = cpu; } -- cgit v0.10.2 From 9045d47ea362e6a3727ee3f1b69a1b656976772e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Nov 2010 17:14:46 -0800 Subject: Revert "xen/privcmd: create address space to allow writable mmaps" This reverts commit 24a89b5be4cf2b7f1b49b56b6cb4a7b71fccf241. We should no longer need an address space now that we're correctly setting VM_PFNMAP on our vmas. Conflicts: drivers/xen/xenfs/super.c Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 990ee42..1aa3897 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include @@ -24,28 +22,12 @@ MODULE_DESCRIPTION("Xen filesystem"); MODULE_LICENSE("GPL"); -static int xenfs_set_page_dirty(struct page *page) -{ - return !TestSetPageDirty(page); -} - -static const struct address_space_operations xenfs_aops = { - .set_page_dirty = xenfs_set_page_dirty, -}; - -static struct backing_dev_info xenfs_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, -}; - static struct inode *xenfs_make_inode(struct super_block *sb, int mode) { struct inode *ret = new_inode(sb); if (ret) { ret->i_mode = mode; - ret->i_mapping->a_ops = &xenfs_aops; - ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info; ret->i_uid = ret->i_gid = 0; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; @@ -137,25 +119,11 @@ static struct file_system_type xenfs_type = { static int __init xenfs_init(void) { - int err; - if (!xen_domain()) { - printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n"); - return 0; - } - - err = register_filesystem(&xenfs_type); - if (err) { - printk(KERN_ERR "xenfs: Unable to register filesystem!\n"); - goto out; - } - - err = bdi_init(&xenfs_backing_dev_info); - if (err) - unregister_filesystem(&xenfs_type); - - out: + if (xen_domain()) + return register_filesystem(&xenfs_type); - return err; + printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n"); + return 0; } static void __exit xenfs_exit(void) -- cgit v0.10.2 From bc7fc5e33e1a093e5f9e196595843bb096471586 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Nov 2010 22:32:17 -0800 Subject: xen/evtchn: the evtchn device is non-seekable Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index fec6ba3..dd8e5e0 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -431,7 +431,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) filp->private_data = u; - return 0; + return nonseekable_open(inode, filp);; } static int evtchn_release(struct inode *inode, struct file *filp) @@ -467,7 +467,7 @@ static const struct file_operations evtchn_fops = { .fasync = evtchn_fasync, .open = evtchn_open, .release = evtchn_release, - .llseek = noop_llseek, + .llseek = no_llseek, }; static struct miscdevice evtchn_miscdev = { -- cgit v0.10.2 From b5d827b641b192ceb6968c21feb544c744e43108 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 7 Dec 2009 17:10:27 -0800 Subject: xen: make evtchn's name less generic Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index eb8a78d..533a199 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -8,9 +8,12 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o +obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o + +xen-evtchn-y := evtchn.o + -- cgit v0.10.2 From 9be4d4575906af9698de660e477f949a076c87e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 31 Aug 2010 15:01:16 -0700 Subject: xen: add extra pages to balloon Add extra pages in the pseudo-physical address space to the balloon so we can extend into them later. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 500290b..df26ee9 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -119,7 +119,7 @@ static void scrub_page(struct page *page) } /* balloon_append: add the given page to the balloon. */ -static void balloon_append(struct page *page) +static void __balloon_append(struct page *page) { /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { @@ -130,7 +130,11 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } +} +static void balloon_append(struct page *page) +{ + __balloon_append(page); totalram_pages--; } @@ -416,10 +420,13 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + for (pfn = PFN_UP(xen_extra_mem_start); + pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size); + pfn++) { page = pfn_to_page(pfn); - if (!PageReserved(page)) - balloon_append(page); + /* totalram_pages doesn't include the boot-time + balloon extension, so don't subtract from it. */ + __balloon_append(page); } target_watch.callback = watch_target; diff --git a/include/xen/page.h b/include/xen/page.h index eaf85fa..0be36b9 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1 +1,8 @@ +#ifndef _XEN_PAGE_H +#define _XEN_PAGE_H + #include + +extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; + +#endif /* _XEN_PAGE_H */ -- cgit v0.10.2 From 2f70e0acd496398671606767122846278126a88b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Sep 2010 23:11:17 -0700 Subject: xen/balloon: the balloon_lock is useless The balloon_lock is useless, since it protects nothing against nothing. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index df26ee9..77b5dc3 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -195,7 +195,7 @@ static unsigned long current_target(void) static int increase_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, i; struct page *page; long rc; struct xen_memory_reservation reservation = { @@ -207,8 +207,6 @@ static int increase_reservation(unsigned long nr_pages) if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); - spin_lock_irqsave(&xen_reservation_lock, flags); - page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); @@ -251,14 +249,12 @@ static int increase_reservation(unsigned long nr_pages) balloon_stats.current_pages += rc; out: - spin_unlock_irqrestore(&xen_reservation_lock, flags); - return rc < 0 ? rc : rc != nr_pages; } static int decrease_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, i; struct page *page; int need_sleep = 0; int ret; @@ -296,8 +292,6 @@ static int decrease_reservation(unsigned long nr_pages) kmap_flush_unused(); flush_tlb_all(); - spin_lock_irqsave(&xen_reservation_lock, flags); - /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); @@ -312,8 +306,6 @@ static int decrease_reservation(unsigned long nr_pages) balloon_stats.current_pages -= nr_pages; - spin_unlock_irqrestore(&xen_reservation_lock, flags); - return need_sleep; } -- cgit v0.10.2 From 66946f676776a6ef333db1cf7453ecf8a66c90df Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 14 Sep 2010 10:32:32 -0700 Subject: xen/balloon: make sure we only include remaining extra ram If the user specifies mem= on the kernel command line, some or all of the extra memory E820 region may be clipped away, so make sure we don't try to add more extra memory than exists in E820. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 77b5dc3..2b17ad5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -391,7 +392,7 @@ static struct notifier_block xenstore_notifier; static int __init balloon_init(void) { - unsigned long pfn; + unsigned long pfn, extra_pfn_end; struct page *page; if (!xen_pv_domain()) @@ -412,8 +413,10 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ + extra_pfn_end = min(e820_end_of_ram_pfn(), + (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); for (pfn = PFN_UP(xen_extra_mem_start); - pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size); + pfn < extra_pfn_end; pfn++) { page = pfn_to_page(pfn); /* totalram_pages doesn't include the boot-time -- cgit v0.10.2 From d2a817130cdc142f1c80a8e60eca824a321926af Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 19 Nov 2010 23:27:06 -0800 Subject: xen: re-enable boot-time ballooning Now that the balloon driver doesn't stumble over non-RAM pages, we can enable the extra space for ballooning. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b0..630fb53 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -248,8 +248,7 @@ char * __init xen_memory_setup(void) else extra_pages = 0; - if (!xen_initial_domain()) - xen_add_extra_mem(extra_pages); + xen_add_extra_mem(extra_pages); return "Xen"; } -- cgit v0.10.2 From ec35a69c467026437519bafcf325a7362e422db9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 16 Nov 2010 12:09:59 -0500 Subject: xen: set IO permission early (before early_cpu_init()) This patch is based off "xen dom0: Set up basic IO permissions for dom0." by Juan Quintela . On AMD machines when we boot the kernel as Domain 0 we get this nasty: mapping kernel into physical memory Xen: setup ISA identity maps about to get started... (XEN) traps.c:475:d0 Unhandled general protection fault fault/trap [#13] on VCPU 0 [ec=0000] (XEN) domain_crash_sync called from entry.S (XEN) Domain 0 (vcpu#0) crashed on cpu#0: (XEN) ----[ Xen-4.1-101116 x86_64 debug=y Not tainted ]---- (XEN) CPU: 0 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000282 EM: 1 CONTEXT: pv guest (XEN) rax: 000000008000c068 rbx: ffffffff8186c680 rcx: 0000000000000068 (XEN) rdx: 0000000000000cf8 rsi: 000000000000c000 rdi: 0000000000000000 (XEN) rbp: ffffffff81801e98 rsp: ffffffff81801e50 r8: ffffffff81801eac (XEN) r9: ffffffff81801ea8 r10: ffffffff81801eb4 r11: 00000000ffffffff (XEN) r12: ffffffff8186c694 r13: ffffffff81801f90 r14: ffffffffffffffff (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 00000000000006f0 (XEN) cr3: 0000000221803000 cr2: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033 (XEN) Guest stack trace from rsp=ffffffff81801e50: RIP points to read_pci_config() function. The issue is that we don't set IO permissions for the Linux kernel early enough. The call sequence used to be: xen_start_kernel() x86_init.oem.arch_setup = xen_setup_arch; setup_arch: - early_cpu_init - early_init_amd - read_pci_config - x86_init.oem.arch_setup [ xen_arch_setup ] - set IO permissions. We need to set the IO permissions earlier on, which this patch does. Acked-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bd35549..7250bef 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1095,6 +1095,8 @@ static void __init xen_setup_stackprotector(void) /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { + struct physdev_set_iopl set_iopl; + int rc; pgd_t *pgd; if (!xen_start_info) @@ -1209,10 +1211,18 @@ asmlinkage void __init xen_start_kernel(void) #else pv_info.kernel_rpl = 0; #endif - /* set the limit of our address space */ xen_reserve_top(); + /* We used to do this in xen_arch_setup, but that is too late on AMD + * were early_cpu_init (run before ->arch_setup()) calls early_amd_init + * which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 630fb53..38fdffa 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -336,9 +336,6 @@ void __cpuinit xen_enable_syscall(void) void __init xen_arch_setup(void) { - struct physdev_set_iopl set_iopl; - int rc; - xen_panic_handler_init(); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); @@ -355,11 +352,6 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - printk(KERN_INFO "physdev_op failed %d\n", rc); - #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); -- cgit v0.10.2 From 12334715720b012180579f57650879d0fbb11a84 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 19 Nov 2010 11:27:09 -0500 Subject: xen/events: Use PIRQ instead of GSI value when unmapping MSI/MSI-X irqs. When we allocate a vector for MSI/MSI-X we save away the PIRQ, and the vector value. When we unmap (de-allocate) the MSI/MSI-X vector(s) we need to provide the PIRQ and the vector value. What we did instead was to provide the GSI (which was zero) and the vector value, and we got these unhappy error messages: (XEN) irq.c:1575: dom0: pirq 0 not mapped [ 7.733415] unmap irq failed -22 This patches fixes this and we use the PIRQ value instead of the GSI value. CC: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index d6d4f76..2811bb9 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -755,7 +755,7 @@ int xen_destroy_irq(int irq) goto out; if (xen_initial_domain()) { - unmap_irq.pirq = info->u.pirq.gsi; + unmap_irq.pirq = info->u.pirq.pirq; unmap_irq.domid = DOMID_SELF; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); if (rc) { -- cgit v0.10.2