From 23ace955c22cb9bdf703e4bdc9bf7379166113cd Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: xen: Don't disable the I/O space If a guest domain wants to access PCI devices through the frontend driver (coming later in the patch series), it will need access to the I/O space. [ Impact: Allow for domU IO access, preparing for pci passthrough ] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b003..c413132 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -260,7 +260,5 @@ void __init xen_arch_setup(void) pm_idle = xen_idle; - paravirt_disable_iospace(); - fiddle_vdso(); } -- cgit v0.10.2 From d8e0420603cf1ce9cb459c00ea0b7337de41b968 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: xen: define BIOVEC_PHYS_MERGEABLE() Impact: allow Xen control of bio merging When running in Xen domain with device access, we need to make sure the block subsystem doesn't merge requests across pages which aren't machine physically contiguous. To do this, we define our own BIOVEC_PHYS_MERGEABLE. When CONFIG_XEN isn't enabled, or we're not running in a Xen domain, this has identical behaviour to the normal implementation. When running under Xen, we also make sure the underlying machine pages are the same or adjacent. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 30a3e97..0ad29d4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -41,6 +41,8 @@ #include #include +#include + #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ @@ -349,6 +351,17 @@ extern void __iomem *early_memremap(resource_size_t phys_addr, extern void early_iounmap(void __iomem *addr, unsigned long size); extern void fixup_early_ioremap(void); +#ifdef CONFIG_XEN +struct bio_vec; + +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#endif /* CONFIG_XEN */ + #define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index fcaf838..b47f5da 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += grant-table.o features.o events.o manage.o +obj-y += grant-table.o features.o events.o manage.o biomerge.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c new file mode 100644 index 0000000..ba6eda4 --- /dev/null +++ b/drivers/xen/biomerge.c @@ -0,0 +1,13 @@ +#include +#include +#include + +bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2) +{ + unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page)); + unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page)); + + return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && + ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); +} -- cgit v0.10.2 From d46a78b05c0e37f76ddf4a7a67bf0b6c68bada55 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 1 Oct 2010 12:20:09 -0400 Subject: xen: implement pirq type event channels A privileged PV Xen domain can get direct access to hardware. In order for this to be useful, it must be able to get hardware interrupts. Being a PV Xen domain, all interrupts are delivered as event channels. PIRQ event channels are bound to a pirq number and an interrupt vector. When a IO APIC raises a hardware interrupt on that vector, it is delivered as an event channel, which we can deliver to the appropriate device driver(s). This patch simply implements the infrastructure for dealing with pirq event channels. [ Impact: integrate hardware interrupts into Xen's event scheme ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7d24b0d..bc69a9d 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -16,7 +16,7 @@ * (typically dom0). * 2. VIRQs, typically used for timers. These are per-cpu events. * 3. IPIs. - * 4. Hardware interrupts. Not supported at present. + * 4. PIRQs - Hardware interrupts. * * Jeremy Fitzhardinge , XenSource Inc, 2007 */ @@ -46,6 +46,9 @@ #include #include +/* Leave low irqs free for identity mapping */ +#define LEGACY_IRQS 16 + /* * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. @@ -89,10 +92,12 @@ struct irq_info enum ipi_vector ipi; struct { unsigned short gsi; - unsigned short vector; + unsigned char vector; + unsigned char flags; } pirq; } u; }; +#define PIRQ_NEEDS_EOI (1 << 0) static struct irq_info irq_info[NR_IRQS]; @@ -113,6 +118,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; +static struct irq_chip xen_pirq_chip; /* Constructor for packed IRQ information. */ static struct irq_info mk_unbound_info(void) @@ -225,6 +231,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } +static bool pirq_needs_eoi(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.flags & PIRQ_NEEDS_EOI; +} + static inline unsigned long active_evtchns(unsigned int cpu, struct shared_info *sh, unsigned int idx) @@ -365,6 +380,210 @@ static int find_unbound_irq(void) return irq; } +static bool identity_mapped_irq(unsigned irq) +{ + /* only identity map legacy irqs */ + return irq < LEGACY_IRQS; +} + +static void pirq_unmask_notify(int irq) +{ + struct physdev_eoi eoi = { .irq = irq }; + + if (unlikely(pirq_needs_eoi(irq))) { + int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + WARN_ON(rc); + } +} + +static void pirq_query_unmask(int irq) +{ + struct physdev_irq_status_query irq_status; + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + irq_status.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + irq_status.flags = 0; + + info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; + if (irq_status.flags & XENIRQSTAT_needs_eoi) + info->u.pirq.flags |= PIRQ_NEEDS_EOI; +} + +static bool probing_irq(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return desc && desc->action == NULL; +} + +static unsigned int startup_pirq(unsigned int irq) +{ + struct evtchn_bind_pirq bind_pirq; + struct irq_info *info = info_for_irq(irq); + int evtchn = evtchn_from_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + if (VALID_EVTCHN(evtchn)) + goto out; + + bind_pirq.pirq = irq; + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + if (!probing_irq(irq)) + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", + irq); + return 0; + } + evtchn = bind_pirq.port; + + pirq_query_unmask(irq); + + evtchn_to_irq[evtchn] = irq; + bind_evtchn_to_cpu(evtchn, 0); + info->evtchn = evtchn; + +out: + unmask_evtchn(evtchn); + pirq_unmask_notify(irq); + + return 0; +} + +static void shutdown_pirq(unsigned int irq) +{ + struct evtchn_close close; + struct irq_info *info = info_for_irq(irq); + int evtchn = evtchn_from_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + if (!VALID_EVTCHN(evtchn)) + return; + + mask_evtchn(evtchn); + + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; + info->evtchn = 0; +} + +static void enable_pirq(unsigned int irq) +{ + startup_pirq(irq); +} + +static void disable_pirq(unsigned int irq) +{ +} + +static void ack_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + mask_evtchn(evtchn); + clear_evtchn(evtchn); + } +} + +static void end_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + struct irq_desc *desc = irq_to_desc(irq); + + if (WARN_ON(!desc)) + return; + + if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == + (IRQ_DISABLED|IRQ_PENDING)) { + shutdown_pirq(irq); + } else if (VALID_EVTCHN(evtchn)) { + unmask_evtchn(evtchn); + pirq_unmask_notify(irq); + } +} + +static int find_irq_by_gsi(unsigned gsi) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_info *info = info_for_irq(irq); + + if (info == NULL || info->type != IRQT_PIRQ) + continue; + + if (gsi_from_irq(irq) == gsi) + return irq; + } + + return -1; +} + +/* + * Allocate a physical irq, along with a vector. We don't assign an + * event channel until the irq actually started up. Return an + * existing irq if we've already got one for the gsi. + */ +int xen_allocate_pirq(unsigned gsi) +{ + int irq; + struct physdev_irq irq_op; + + spin_lock(&irq_mapping_update_lock); + + irq = find_irq_by_gsi(gsi); + if (irq != -1) { + printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n", + irq, gsi); + goto out; /* XXX need refcount? */ + } + + if (identity_mapped_irq(gsi)) { + irq = gsi; + dynamic_irq_init(irq); + } else + irq = find_unbound_irq(); + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, "pirq"); + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { + dynamic_irq_cleanup(irq); + irq = -ENOSPC; + goto out; + } + + irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + +out: + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +int xen_vector_from_irq(unsigned irq) +{ + return vector_from_irq(irq); +} + +int xen_gsi_from_irq(unsigned irq) +{ + return gsi_from_irq(irq); +} + int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -964,6 +1183,26 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .retrigger = retrigger_dynirq, }; +static struct irq_chip xen_pirq_chip __read_mostly = { + .name = "xen-pirq", + + .startup = startup_pirq, + .shutdown = shutdown_pirq, + + .enable = enable_pirq, + .unmask = enable_pirq, + + .disable = disable_pirq, + .mask = disable_pirq, + + .ack = ack_pirq, + .end = end_pirq, + + .set_affinity = set_affinity_irq, + + .retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_percpu_chip __read_mostly = { .name = "xen-percpu", diff --git a/include/xen/events.h b/include/xen/events.h index a15d932..8f62320 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -63,4 +63,15 @@ int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); +/* Allocate an irq for a physical interrupt, given a gsi. "Legacy" + * GSIs are identity mapped; others are dynamically allocated as + * usual. */ +int xen_allocate_pirq(unsigned gsi); + +/* Return vector allocated to pirq */ +int xen_vector_from_irq(unsigned pirq); + +/* Return gsi allocated to pirq */ +int xen_gsi_from_irq(unsigned pirq); + #endif /* _XEN_EVENTS_H */ -- cgit v0.10.2 From 7b586d71858091f0958e5808b7e3d5390c2ae47d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 17:22:49 -0800 Subject: x86/io_apic: add get_nr_irqs_gsi() Impact: new interface to get max GSI Add get_nr_irqs_gsi() to return nr_irqs_gsi. Xen will use this to determine how many irqs it needs to reserve for hardware irqs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: "H. Peter Anvin" Acked-by: Thomas Gleixner Cc: x86@kernel.org Cc: Jesse Barnes diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c8be456..a6b28d0 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void probe_nr_irqs_gsi(void); +extern int get_nr_irqs_gsi(void); extern void setup_ioapic_ids_from_mpc(void); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 20e47e0..44bb914 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3649,6 +3649,11 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +int get_nr_irqs_gsi(void) +{ + return nr_irqs_gsi; +} + #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { -- cgit v0.10.2 From 0794bfc74365d0de4b1d4920cb71031850551cbd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Oct 2010 10:41:08 -0400 Subject: xen: identity map gsi->irqs Impact: preserve compat with native Reserve the lower irq range for use for hardware interrupts so we can identity-map them. [v2: Rebased on top tip/irq/core] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index bc69a9d..1bb51e4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -46,9 +47,6 @@ #include #include -/* Leave low irqs free for identity mapping */ -#define LEGACY_IRQS 16 - /* * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. @@ -351,6 +349,17 @@ static void unmask_evtchn(int port) put_cpu(); } +static int get_nr_hw_irqs(void) +{ + int ret = 1; + +#ifdef CONFIG_X86_IO_APIC + ret = get_nr_irqs_gsi(); +#endif + + return ret; +} + static int find_unbound_irq(void) { struct irq_data *data; @@ -382,8 +391,8 @@ static int find_unbound_irq(void) static bool identity_mapped_irq(unsigned irq) { - /* only identity map legacy irqs */ - return irq < LEGACY_IRQS; + /* identity map all the hardware irqs */ + return irq < get_nr_hw_irqs(); } static void pirq_unmask_notify(int irq) @@ -552,6 +561,7 @@ int xen_allocate_pirq(unsigned gsi) if (identity_mapped_irq(gsi)) { irq = gsi; + irq_to_desc_alloc_node(irq, 0); dynamic_irq_init(irq); } else irq = find_unbound_irq(); -- cgit v0.10.2 From b21ddbf50386d10cdd60d8f8e744cff0496d2552 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jun 2010 16:28:49 -0400 Subject: xen: dynamically allocate irq & event structures Dynamically allocate the irq_info and evtchn_to_irq arrays, so that 1) the irq_info array scales to the actual number of possible irqs, and 2) we don't needlessly increase the static size of the kernel when we aren't running under Xen. Derived on patch from Mike Travis . [Impact: reduce memory usage ] [v2: Conflict in drivers/xen/events.c: Replaced alloc_bootmen with kcalloc ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 1bb51e4..19a9329 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -97,11 +98,9 @@ struct irq_info }; #define PIRQ_NEEDS_EOI (1 << 0) -static struct irq_info irq_info[NR_IRQS]; +static struct irq_info *irq_info; -static int evtchn_to_irq[NR_EVENT_CHANNELS] = { - [0 ... NR_EVENT_CHANNELS-1] = -1 -}; +static int *evtchn_to_irq; struct cpu_evtchn_s { unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; }; @@ -527,7 +526,7 @@ static int find_irq_by_gsi(unsigned gsi) { int irq; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { struct irq_info *info = info_for_irq(irq); if (info == NULL || info->type != IRQT_PIRQ) @@ -1267,7 +1266,12 @@ void __init xen_init_IRQ(void) cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), GFP_KERNEL); - BUG_ON(cpu_evtchn_mask_p == NULL); + irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); + + evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), + GFP_KERNEL); + for (i = 0; i < NR_EVENT_CHANNELS; i++) + evtchn_to_irq[i] = -1; init_evtchn_cpu_bindings(); -- cgit v0.10.2 From 1a60d05f40882303dad13f8f0e077e2e49ea8996 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 4 Oct 2010 13:42:27 -0400 Subject: xen: set pirq name to something useful. Impact: cleanup Make pirq show useful information in /proc/interrupts [v2: Removed the parts for arch/x86/xen/pci.c ] Signed-off-by: Gerd Hoffmann Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 19a9329..0fcfb4a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -544,7 +544,7 @@ static int find_irq_by_gsi(unsigned gsi) * event channel until the irq actually started up. Return an * existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi) +int xen_allocate_pirq(unsigned gsi, char *name) { int irq; struct physdev_irq irq_op; @@ -566,7 +566,7 @@ int xen_allocate_pirq(unsigned gsi) irq = find_unbound_irq(); set_irq_chip_and_handler_name(irq, &xen_pirq_chip, - handle_level_irq, "pirq"); + handle_level_irq, name); irq_op.irq = irq; if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { diff --git a/include/xen/events.h b/include/xen/events.h index 8f62320..8227da8 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -66,7 +66,7 @@ void xen_hvm_evtchn_do_upcall(void); /* Allocate an irq for a physical interrupt, given a gsi. "Legacy" * GSIs are identity mapped; others are dynamically allocated as * usual. */ -int xen_allocate_pirq(unsigned gsi); +int xen_allocate_pirq(unsigned gsi, char *name); /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v0.10.2 From 3b32f574a032bb5c93957317bd4ce5c3397d5a7b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 13 Aug 2009 12:50:37 -0700 Subject: xen: statically initialize cpu_evtchn_mask_p Sometimes cpu_evtchn_mask_p can get used early, before it has been allocated. Statically initialize it with an initdata version to catch any early references. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0fcfb4a..1e39908 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -104,7 +104,12 @@ static int *evtchn_to_irq; struct cpu_evtchn_s { unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; }; -static struct cpu_evtchn_s *cpu_evtchn_mask_p; + +static __initdata struct cpu_evtchn_s init_evtchn_mask = { + .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, +}; +static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; + static inline unsigned long *cpu_evtchn_mask(int cpu) { return cpu_evtchn_mask_p[cpu].bits; -- cgit v0.10.2 From 3a69e9165a271b026c7149886b96ab0cc2e9a36b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Oct 2010 10:49:10 -0400 Subject: xen: Find an unbound irq number in reverse order (high to low). In earlier Xen Linux kernels, the IRQ mapping was a straight 1:1 and the find_unbound_irq started looking around 256 for open IRQs and up. IRQs from 0 to 255 were reserved for PCI devices. Previous to this patch, the 'find_unbound_irq' started looking at get_nr_hw_irqs() number. For privileged domain where the ACPI information is available that returns the upper-bound of what the GSIs. For non-privileged PV domains, where ACPI is no-existent the get_nr_hw_irqs() reports the IRQ_LEGACY (16). With PCI passthrough enabled, and with PCI cards that have IRQs pinned to a higher number than 16 we collide with previously allocated IRQs. Specifically the PCI IRQs collide with the IPI's for Xen functions (as they are allocated earlier). For example: 00:00.11 USB Controller: ATI Technologies Inc SB700 USB OHCI1 Controller (prog-if 10 [OHCI]) ... Interrupt: pin A routed to IRQ 18 [root@localhost ~]# cat /proc/interrupts | head CPU0 CPU1 CPU2 16: 38186 0 0 xen-dyn-virq timer0 17: 149 0 0 xen-dyn-ipi spinlock0 18: 962 0 0 xen-dyn-ipi resched0 and when the USB controller is loaded, the kernel reports: IRQ handler type mismatch for IRQ 18 current handler: resched0 One way to fix this is to reverse the logic when looking for un-used IRQ numbers and start with the highest available number. With that, we would get: CPU0 CPU1 CPU2 ... snip .. 292: 35 0 0 xen-dyn-ipi callfunc0 293: 3992 0 0 xen-dyn-ipi resched0 294: 224 0 0 xen-dyn-ipi spinlock0 295: 57183 0 0 xen-dyn-virq timer0 NMI: 0 0 0 Non-maskable interrupts .. snip .. And interrupts for PCI cards are now accessible. This patch also includes the fix, found by Ian Campbell, titled "xen: fix off-by-one error in find_unbound_irq." [v2: Added an explanation in the code] [v3: Rebased on top of tip/irq/core] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 1e39908..bab5ac1 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -368,8 +368,13 @@ static int find_unbound_irq(void) { struct irq_data *data; int irq, res; + int start = get_nr_hw_irqs(); - for (irq = 0; irq < nr_irqs; irq++) { + if (start == nr_irqs) + goto no_irqs; + + /* nr_irqs is a magic value. Must not use it.*/ + for (irq = nr_irqs-1; irq > start; irq--) { data = irq_get_irq_data(irq); /* only 0->15 have init'd desc; handle irq > 16 */ if (!data) @@ -382,8 +387,8 @@ static int find_unbound_irq(void) return irq; } - if (irq == nr_irqs) - panic("No available IRQ to bind to: increase nr_irqs!\n"); + if (irq == start) + goto no_irqs; res = irq_alloc_desc_at(irq, 0); @@ -391,6 +396,9 @@ static int find_unbound_irq(void) return -1; return irq; + +no_irqs: + panic("No available IRQ to bind to: increase nr_irqs!\n"); } static bool identity_mapped_irq(unsigned irq) @@ -544,8 +552,15 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } -/* - * Allocate a physical irq, along with a vector. We don't assign an +/* xen_allocate_irq might allocate irqs from the top down, as a + * consequence don't assume that the irq number returned has a low value + * or can be used as a pirq number unless you know otherwise. + * + * One notable exception is when xen_allocate_irq is called passing an + * hardware gsi as argument, in that case the irq number returned + * matches the gsi number passed as first argument. + + * Note: We don't assign an * event channel until the irq actually started up. Return an * existing irq if we've already got one for the gsi. */ -- cgit v0.10.2 From d9a8814f27080cec6126fca3ef0c210d9f56181e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 Nov 2009 16:33:09 -0500 Subject: xen: Provide a variant of xen_poll_irq with timeout. The 'xen_poll_irq_timeout' provides a method to pass in the poll timeout for IRQs if requested. We also export those two poll functions as Xen PCI fronted uses them. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index bab5ac1..4e0f8685 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1139,7 +1139,7 @@ void xen_clear_irq_pending(int irq) if (VALID_EVTCHN(evtchn)) clear_evtchn(evtchn); } - +EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) { int evtchn = evtchn_from_irq(irq); @@ -1159,9 +1159,9 @@ bool xen_test_irq_pending(int irq) return ret; } -/* Poll waiting for an irq to become pending. In the usual case, the - irq will be disabled so it won't deliver an interrupt. */ -void xen_poll_irq(int irq) +/* Poll waiting for an irq to become pending with timeout. In the usual case, + * the irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq_timeout(int irq, u64 timeout) { evtchn_port_t evtchn = evtchn_from_irq(irq); @@ -1169,13 +1169,20 @@ void xen_poll_irq(int irq) struct sched_poll poll; poll.nr_ports = 1; - poll.timeout = 0; + poll.timeout = timeout; set_xen_guest_handle(poll.ports, &evtchn); if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) BUG(); } } +EXPORT_SYMBOL(xen_poll_irq_timeout); +/* Poll waiting for an irq to become pending. In the usual case, the + * irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq) +{ + xen_poll_irq_timeout(irq, 0 /* no timeout */); +} void xen_irq_resume(void) { diff --git a/include/xen/events.h b/include/xen/events.h index 8227da8..2532f8b 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,6 +53,10 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Poll waiting for an irq to become pending with a timeout. In the usual case, + * the irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq_timeout(int irq, u64 timeout); + /* Determine the IRQ which is bound to an event channel */ unsigned irq_from_evtchn(unsigned int evtchn); -- cgit v0.10.2 From 15ebbb82bac700db3c91e662fb70cb3559e9d930 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 4 Oct 2010 13:43:27 -0400 Subject: xen: fix shared irq device passthrough In driver/xen/events.c, whether bind_pirq is shareable or not is determined by desc->action is NULL or not. But in __setup_irq, startup(irq) is invoked before desc->action is assigned with new action. So desc->action in startup_irq is always NULL, and bind_pirq is always not shareable. This results in pt_irq_create_bind failure when passthrough a device which shares irq to other devices. This patch doesn't use probing_irq to determine if pirq is shareable or not, instead set shareable flag in irq_info according to trigger mode in xen_allocate_pirq. Set level triggered interrupts shareable. Thus use this flag to set bind_pirq flag accordingly. [v2: arch/x86/xen/pci.c no more, so file skipped] Signed-off-by: Weidong Han Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4e0f8685..cd50409 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -97,6 +97,7 @@ struct irq_info } u; }; #define PIRQ_NEEDS_EOI (1 << 0) +#define PIRQ_SHAREABLE (1 << 1) static struct irq_info *irq_info; @@ -445,6 +446,7 @@ static unsigned int startup_pirq(unsigned int irq) struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); int evtchn = evtchn_from_irq(irq); + int rc; BUG_ON(info->type != IRQT_PIRQ); @@ -453,8 +455,10 @@ static unsigned int startup_pirq(unsigned int irq) bind_pirq.pirq = irq; /* NB. We are happy to share unless we are probing. */ - bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? + BIND_PIRQ__WILL_SHARE : 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); + if (rc != 0) { if (!probing_irq(irq)) printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq); @@ -564,7 +568,7 @@ static int find_irq_by_gsi(unsigned gsi) * event channel until the irq actually started up. Return an * existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi, char *name) +int xen_allocate_pirq(unsigned gsi, int shareable, char *name) { int irq; struct physdev_irq irq_op; @@ -596,6 +600,7 @@ int xen_allocate_pirq(unsigned gsi, char *name) } irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; out: spin_unlock(&irq_mapping_update_lock); diff --git a/include/xen/events.h b/include/xen/events.h index 2532f8b..d7a4ca7 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -70,7 +70,7 @@ void xen_hvm_evtchn_do_upcall(void); /* Allocate an irq for a physical interrupt, given a gsi. "Legacy" * GSIs are identity mapped; others are dynamically allocated as * usual. */ -int xen_allocate_pirq(unsigned gsi, char *name); +int xen_allocate_pirq(unsigned gsi, int shareable, char *name); /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v0.10.2 From 44de3395a4bb61341dfb7b3b7c94edfddeabae4b Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Thu, 18 Mar 2010 14:28:12 -0400 Subject: x86/PCI: Clean up pci_cache_line_size Separate out x86 cache_line_size initialisation code into its own function (so it can be shared by Xen later in this patch series) [ Impact: cleanup ] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: "H. Peter Anvin" Reviewed-by: Matthew Wilcox Reviewed-by: Jesse Barnes Cc: x86@kernel.org diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 49c7219..7045267 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -47,6 +47,7 @@ enum pci_bf_sort_state { extern unsigned int pcibios_max_latency; void pcibios_resource_survey(void); +void pcibios_set_cache_line_size(void); /* pci-pc.c */ diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index a0772af..f7c8a39 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -421,16 +421,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } - -int __init pcibios_init(void) +void __init pcibios_set_cache_line_size(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (!raw_pci_ops) { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return 0; - } - /* * Set PCI cacheline size to that of the CPU if the CPU has reported it. * (For older CPUs that don't support cpuid, we se it to 32 bytes @@ -445,7 +439,16 @@ int __init pcibios_init(void) pci_dfl_cache_line_size = 32 >> 2; printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); } +} + +int __init pcibios_init(void) +{ + if (!raw_pci_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return 0; + } + pcibios_set_cache_line_size(); pcibios_resource_survey(); if (pci_bf_sort >= pci_force_bf) -- cgit v0.10.2 From 5ee01f49c963d5e0b530344f86535ecb7f672064 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Mar 2010 14:31:30 -0400 Subject: x86/PCI: make sure _PAGE_IOMAP it set on pci mappings When mapping pci space via /sys or /proc, make sure we're really doing a hardware mapping by setting _PAGE_IOMAP. [ Impact: bugfix; make PCI mappings map the right pages ] Signed-off-by: Jeremy Fitzhardinge Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: "H. Peter Anvin" Reviewed-by: Matthew Wilcox Acked-by: Jesse Barnes Cc: x86@kernel.org diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5525309..8379c2c 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -311,6 +311,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, */ prot |= _PAGE_CACHE_UC_MINUS; + prot |= _PAGE_IOMAP; /* creating a mapping for IO */ + vma->vm_page_prot = __pgprot(prot); if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, -- cgit v0.10.2 From 7c94def89aa5091706e03b98047c074d7ac74af0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 22 Dec 2009 14:49:45 -0500 Subject: x86/PCI: Export pci_walk_bus function. In preperation of modularizing Xen-pcifront the pci_walk_bus needs to be exported so that the xen-pcifront module can walk call the pci subsystem to walk the PCI devices and claim them. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Jesse Barnes [http://marc.info/?l=linux-pci&m=126149958010298&w=2] diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 7f0af0e..69546e9 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -299,6 +299,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), } up_read(&pci_bus_sem); } +EXPORT_SYMBOL_GPL(pci_walk_bus); EXPORT_SYMBOL(pci_bus_alloc_resource); EXPORT_SYMBOL_GPL(pci_bus_add_device); -- cgit v0.10.2 From 1525bf0d8f059a38c6e79353583854e1981b2e67 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Oct 2010 16:05:35 -0400 Subject: msi: Introduce default_[teardown|setup]_msi_irqs with fallback. Introduce an override for the arch_[teardown|setup]_msi_irqs that can be utilized to fallback to the default arch_* code. If a platform wants to utilize the code paths defined in driver/pci/msi.c it has to define HAVE_DEFAULT_MSI_TEARDOWN_IRQS or HAVE_DEFAULT_MSI_SETUP_IRQS. Otherwise the old mechanism of over-ridding the arch_* works fine. Signed-off-by: Konrad Rzeszutek Wilk Cc: x86@kernel.org diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5fcf5ae..7c24dce 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_setup_msi_irqs -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +# define arch_setup_msi_irqs default_setup_msi_irqs +# define HAVE_DEFAULT_MSI_SETUP_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS +int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; @@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_teardown_msi_irqs -void arch_teardown_msi_irqs(struct pci_dev *dev) +# define arch_teardown_msi_irqs default_teardown_msi_irqs +# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS +void default_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; -- cgit v0.10.2 From 294ee6f89cfd629e276f632a6003a0fad7785dce Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 6 Oct 2010 16:12:28 -0400 Subject: x86: Introduce x86_msi_ops Introduce an x86 specific indirect mechanism to setup MSIs. The MSI setup functions become function pointers in an x86_msi_ops struct, that defaults to the implementation in io_apic.c and msi.c. [v2: Use HAVE_DEFAULT_* knobs] Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Jesse Barnes diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d395540..ca0437c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef __KERNEL__ @@ -94,8 +95,36 @@ static inline void early_quirks(void) { } extern void pci_iommu_alloc(void); -/* MSI arch hook */ -#define arch_setup_msi_irqs arch_setup_msi_irqs +#ifdef CONFIG_PCI_MSI +/* MSI arch specific hooks */ +static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + return x86_msi.setup_msi_irqs(dev, nvec, type); +} + +static inline void x86_teardown_msi_irqs(struct pci_dev *dev) +{ + x86_msi.teardown_msi_irqs(dev); +} + +static inline void x86_teardown_msi_irq(unsigned int irq) +{ + x86_msi.teardown_msi_irq(irq); +} +#define arch_setup_msi_irqs x86_setup_msi_irqs +#define arch_teardown_msi_irqs x86_teardown_msi_irqs +#define arch_teardown_msi_irq x86_teardown_msi_irq +/* implemented in arch/x86/kernel/apic/io_apic. */ +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +void native_teardown_msi_irq(unsigned int irq); +/* default to the implementation in drivers/lib/msi.c */ +#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +void default_teardown_msi_irqs(struct pci_dev *dev); +#else +#define native_setup_msi_irqs NULL +#define native_teardown_msi_irq NULL +#define default_teardown_msi_irqs NULL +#endif #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index baa579c..64642ad 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -154,9 +154,18 @@ struct x86_platform_ops { int (*i8042_detect)(void); }; +struct pci_dev; + +struct x86_msi_ops { + int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); + void (*teardown_msi_irq)(unsigned int irq); + void (*teardown_msi_irqs)(struct pci_dev *dev); +}; + extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; +extern struct x86_msi_ops x86_msi; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 44bb914..0885a41 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3330,7 +3330,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; @@ -3388,7 +3388,7 @@ error: return ret; } -void arch_teardown_msi_irq(unsigned int irq) +void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index cd6da6b..ceb2911 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -6,10 +6,12 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = { }; EXPORT_SYMBOL_GPL(x86_platform); +struct x86_msi_ops x86_msi = { + .setup_msi_irqs = native_setup_msi_irqs, + .teardown_msi_irq = native_teardown_msi_irq, + .teardown_msi_irqs = default_teardown_msi_irqs, +}; -- cgit v0.10.2 From b5401a96b59475c1c878439caecb8c521bdfd4ad Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Thu, 18 Mar 2010 16:31:34 -0400 Subject: xen/x86/PCI: Add support for the Xen PCI subsystem The frontend stub lives in arch/x86/pci/xen.c, alongside other sub-arch PCI init code (e.g. olpc.c). It provides a mechanism for Xen PCI frontend to setup/destroy legacy interrupts, MSI/MSI-X, and PCI configuration operations. [ Impact: add core of Xen PCI support ] [ v2: Removed the IOMMU code and only focusing on PCI.] [ v3: removed usage of pci_scan_all_fns as that does not exist] [ v4: introduced pci_xen value to fix compile warnings] [ v5: squished fixes+features in one patch, changed Reviewed-by to Ccs] [ v7: added Acked-by] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini Acked-by: Jesse Barnes Cc: "H. Peter Anvin" Cc: Matthew Wilcox Cc: Qing He Cc: Thomas Gleixner Cc: x86@kernel.org diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8cc5108..74ea59d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1898,6 +1898,11 @@ config PCI_OLPC def_bool y depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) +config PCI_XEN + def_bool y + depends on PCI && XEN + select SWIOTLB_XEN + config PCI_DOMAINS def_bool y depends on PCI diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h new file mode 100644 index 0000000..449c82f --- /dev/null +++ b/arch/x86/include/asm/xen/pci.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_XEN_PCI_H +#define _ASM_X86_XEN_PCI_H + +#if defined(CONFIG_PCI_XEN) +extern int __init pci_xen_init(void); +#define pci_xen 1 +#else +#define pci_xen 0 +#define pci_xen_init (0) +#endif + +#if defined(CONFIG_PCI_MSI) +#if defined(CONFIG_PCI_XEN) +/* The drivers/pci/xen-pcifront.c sets this structure to + * its own functions. + */ +struct xen_pci_frontend_ops { + int (*enable_msi)(struct pci_dev *dev, int **vectors); + void (*disable_msi)(struct pci_dev *dev); + int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + void (*disable_msix)(struct pci_dev *dev); +}; + +extern struct xen_pci_frontend_ops *xen_pci_frontend; + +static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, + int **vectors) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msi) + return xen_pci_frontend->enable_msi(dev, vectors); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msi) + xen_pci_frontend->disable_msi(dev); +} +static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, + int **vectors, int nvec) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msix) + return xen_pci_frontend->enable_msix(dev, vectors, nvec); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msix) + xen_pci_frontend->disable_msix(dev); +} +#endif /* CONFIG_PCI_XEN */ +#endif /* CONFIG_PCI_MSI */ + +#endif /* _ASM_X86_XEN_PCI_H */ diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index a0207a7..effd96e 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o +obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c new file mode 100644 index 0000000..b19c873 --- /dev/null +++ b/arch/x86/pci/xen.c @@ -0,0 +1,147 @@ +/* + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux + * x86 PCI core to support the Xen PCI Frontend + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#if defined(CONFIG_PCI_MSI) +#include + +struct xen_pci_frontend_ops *xen_pci_frontend; +EXPORT_SYMBOL_GPL(xen_pci_frontend); + +/* + * For MSI interrupts we have to use drivers/xen/event.s functions to + * allocate an irq_desc and setup the right */ + + +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, i; + struct msi_desc *msidesc; + int *v; + + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); + if (!v) + return -ENOMEM; + + if (!xen_initial_domain()) { + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, &v); + if (ret) + goto error; + } + i = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_allocate_pirq(v[i], 0, /* not sharable */ + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : "pcifront-msi"); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error_while; + i++; + } + kfree(v); + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + kfree(v); + return ret; +} + +static void xen_teardown_msi_irqs(struct pci_dev *dev) +{ + /* Only do this when were are in non-privileged mode.*/ + if (!xen_initial_domain()) { + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); + } + +} + +static void xen_teardown_msi_irq(unsigned int irq) +{ + xen_destroy_irq(irq); +} +#endif + +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ + int rc; + int share = 1; + + dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); + + if (dev->irq < 0) + return -EINVAL; + + if (dev->irq < NR_IRQS_LEGACY) + share = 0; + + rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", + dev->irq, rc); + return rc; + } + return 0; +} + +int __init pci_xen_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + + pcibios_set_cache_line_size(); + + pcibios_enable_irq = xen_pcifront_enable_irq; + pcibios_disable_irq = NULL; + +#ifdef CONFIG_ACPI + /* Keep ACPI out of the picture */ + acpi_noirq = 1; +#endif + +#ifdef CONFIG_ISAPNP + /* Stop isapnp from probing */ + isapnp_disable = 1; +#endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; +#endif + return 0; +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7d46c84..1ccfa1b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1220,6 +1221,8 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + if (pci_xen) + x86_init.pci.arch_init = pci_xen_init; } else { /* Make sure ACS will be enabled */ pci_request_acs(); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index cd50409..7016a73 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -582,7 +582,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) goto out; /* XXX need refcount? */ } - if (identity_mapped_irq(gsi)) { + /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore + * we are using the !xen_initial_domain() to drop in the function.*/ + if (identity_mapped_irq(gsi) || !xen_initial_domain()) { irq = gsi; irq_to_desc_alloc_node(irq, 0); dynamic_irq_init(irq); @@ -593,7 +595,13 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) handle_level_irq, name); irq_op.irq = irq; - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { + irq_op.vector = 0; + + /* Only the privileged domain can do this. For non-priv, the pcifront + * driver provides a PCI bus that does the call to do exactly + * this in the priv domain. */ + if (xen_initial_domain() && + HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { dynamic_irq_cleanup(irq); irq = -ENOSPC; goto out; @@ -608,6 +616,26 @@ out: return irq; } +int xen_destroy_irq(int irq) +{ + struct irq_desc *desc; + int rc = -ENOENT; + + spin_lock(&irq_mapping_update_lock); + + desc = irq_to_desc(irq); + if (!desc) + goto out; + + irq_info[irq] = mk_unbound_info(); + + dynamic_irq_cleanup(irq); + +out: + spin_unlock(&irq_mapping_update_lock); + return rc; +} + int xen_vector_from_irq(unsigned irq) { return vector_from_irq(irq); diff --git a/include/xen/events.h b/include/xen/events.h index d7a4ca7..c1717ca 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,6 +72,9 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); +/* De-allocates the above mentioned physical interrupt. */ +int xen_destroy_irq(int irq); + /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v0.10.2 From 89afb6e46a0f72e0e5c51ef44aa900b74681664b Mon Sep 17 00:00:00 2001 From: Yosuke Iwamatsu Date: Tue, 13 Oct 2009 17:22:27 -0400 Subject: xenbus: Xen paravirtualised PCI hotplug support. The Xen PCI front driver adds two new states that are utilizez for PCI hotplug support. This is a patch pulled from the linux-2.6-xen-sparse tree. Signed-off-by: Noboru Iwamatsu Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Yosuke Iwamatsu diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 7e49527..cdacf92 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state) [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", + [XenbusStateReconfiguring] = "Reconfiguring", + [XenbusStateReconfigured] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h index 46508c7..9fda532 100644 --- a/include/xen/interface/io/xenbus.h +++ b/include/xen/interface/io/xenbus.h @@ -27,8 +27,14 @@ enum xenbus_state XenbusStateClosing = 5, /* The device is being closed due to an error or an unplug event. */ - XenbusStateClosed = 6 + XenbusStateClosed = 6, + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 }; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ -- cgit v0.10.2 From b78c9512563780d86a178f11b648bcf73b8d87ec Mon Sep 17 00:00:00 2001 From: Noboru Iwamatsu Date: Tue, 13 Oct 2009 17:22:29 -0400 Subject: xenbus: prevent warnings on unhandled enumeration values XenbusStateReconfiguring/XenbusStateReconfigured were introduced by c/s 437, but aren't handled in many switch statements. .. also pulled from the linux-2.6-sparse-tree tree. Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ab735a6..c4e9d81 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1125,6 +1125,8 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateInitialising: case XenbusStateInitWait: case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: case XenbusStateUnknown: case XenbusStateClosed: break; diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index ebb1190..e0c024d 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -276,6 +276,8 @@ static void xenkbd_backend_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: case XenbusStateUnknown: case XenbusStateClosed: break; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index b50fedc..cb6e112 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1610,6 +1610,8 @@ static void backend_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: case XenbusStateConnected: case XenbusStateUnknown: case XenbusStateClosed: diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 7c7f42a12..428d273 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -631,6 +631,8 @@ static void xenfb_backend_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: case XenbusStateUnknown: case XenbusStateClosed: break; -- cgit v0.10.2 From 956a9202cd1220397933a07beda9f96b3df1fa24 Mon Sep 17 00:00:00 2001 From: Ryan Wilson Date: Mon, 2 Aug 2010 21:31:05 -0400 Subject: xen-pcifront: Xen PCI frontend driver. This is a port of the 2.6.18 Xen PCI front driver with fixes to make it build under 2.6.34 and later (for the full list of changes: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git historic/xen-pcifront-0.1). It also includes the fixes to make it work properly. [v2: Updated Kconfig, removed crud, added Reviewed-by] [v3: Added 'static', fixed grant table leak, redid Kconfig] [v4: Added one more 'static' and removed comments] Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Stefano Stabellini Reviewed-by: Jan Beulich diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34ef70d..5b1630e 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -40,6 +40,27 @@ config PCI_STUB When in doubt, say N. +config XEN_PCIDEV_FRONTEND + tristate "Xen PCI Frontend" + depends on PCI && X86 && XEN + select HOTPLUG + select PCI_XEN + default y + help + The PCI device frontend driver allows the kernel to import arbitrary + PCI devices from a PCI backend to support PCI driver domains. + +config XEN_PCIDEV_FE_DEBUG + bool "Xen PCI Frontend debugging" + depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG + help + Say Y here if you want the Xen PCI frontend to produce a bunch of debug + messages to the system log. Select this if you are having a + problem with Xen PCI frontend support and want to see more of what is + going on. + + When in doubt, say N. + config HT_IRQ bool "Interrupts on hypertransport devices" default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index dc1aa09..d5e2705 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o + ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c new file mode 100644 index 0000000..a87c498 --- /dev/null +++ b/drivers/pci/xen-pcifront.c @@ -0,0 +1,1148 @@ +/* + * Xen PCI Frontend. + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INVALID_GRANT_REF (0) +#define INVALID_EVTCHN (-1) + +struct pci_bus_entry { + struct list_head list; + struct pci_bus *bus; +}; + +#define _PDEVB_op_active (0) +#define PDEVB_op_active (1 << (_PDEVB_op_active)) + +struct pcifront_device { + struct xenbus_device *xdev; + struct list_head root_buses; + + int evtchn; + int gnt_ref; + + int irq; + + /* Lock this when doing any operations in sh_info */ + spinlock_t sh_info_lock; + struct xen_pci_sharedinfo *sh_info; + struct work_struct op_work; + unsigned long flags; + +}; + +struct pcifront_sd { + int domain; + struct pcifront_device *pdev; +}; + +static inline struct pcifront_device * +pcifront_get_pdev(struct pcifront_sd *sd) +{ + return sd->pdev; +} + +static inline void pcifront_init_sd(struct pcifront_sd *sd, + unsigned int domain, unsigned int bus, + struct pcifront_device *pdev) +{ + sd->domain = domain; + sd->pdev = pdev; +} + +static DEFINE_SPINLOCK(pcifront_dev_lock); +static struct pcifront_device *pcifront_dev; + +static int verbose_request; +module_param(verbose_request, int, 0644); + +static int errno_to_pcibios_err(int errno) +{ + switch (errno) { + case XEN_PCI_ERR_success: + return PCIBIOS_SUCCESSFUL; + + case XEN_PCI_ERR_dev_not_found: + return PCIBIOS_DEVICE_NOT_FOUND; + + case XEN_PCI_ERR_invalid_offset: + case XEN_PCI_ERR_op_failed: + return PCIBIOS_BAD_REGISTER_NUMBER; + + case XEN_PCI_ERR_not_implemented: + return PCIBIOS_FUNC_NOT_SUPPORTED; + + case XEN_PCI_ERR_access_denied: + return PCIBIOS_SET_FAILED; + } + return errno; +} + +static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) +{ + if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) { + dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n"); + schedule_work(&pdev->op_work); + } +} + +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) +{ + int err = 0; + struct xen_pci_op *active_op = &pdev->sh_info->op; + unsigned long irq_flags; + evtchn_port_t port = pdev->evtchn; + unsigned irq = pdev->irq; + s64 ns, ns_timeout; + struct timeval tv; + + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); + + memcpy(active_op, op, sizeof(struct xen_pci_op)); + + /* Go */ + wmb(); + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(port); + + /* + * We set a poll timeout of 3 seconds but give up on return after + * 2 seconds. It is better to time out too late rather than too early + * (in the latter case we end up continually re-executing poll() with a + * timeout in the past). 1s difference gives plenty of slack for error. + */ + do_gettimeofday(&tv); + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; + + xen_clear_irq_pending(irq); + + while (test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags)) { + xen_poll_irq_timeout(irq, jiffies + 3*HZ); + xen_clear_irq_pending(irq); + do_gettimeofday(&tv); + ns = timeval_to_ns(&tv); + if (ns > ns_timeout) { + dev_err(&pdev->xdev->dev, + "pciback not responding!!!\n"); + clear_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags); + err = XEN_PCI_ERR_dev_not_found; + goto out; + } + } + + /* + * We might lose backend service request since we + * reuse same evtchn with pci_conf backend response. So re-schedule + * aer pcifront service. + */ + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&pdev->sh_info->flags)) { + dev_err(&pdev->xdev->dev, + "schedule aer pcifront service\n"); + schedule_pcifront_aer_op(pdev); + } + + memcpy(op, active_op, sizeof(struct xen_pci_op)); + + err = op->err; +out: + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); + return err; +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + int err = 0; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_read, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where, size); + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (verbose_request) + dev_info(&pdev->xdev->dev, "read got back value %x\n", + op.value); + + *val = op.value; + } else if (err == -ENODEV) { + /* No device here, pretend that it just returned 0 */ + err = 0; + *val = 0; + } + + return errno_to_pcibios_err(err); +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_write, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + .value = val, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "write dev=%04x:%02x:%02x.%01x - " + "offset %x size %d val %x\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + + return errno_to_pcibios_err(do_pci_op(pdev, &op)); +} + +struct pci_ops pcifront_bus_ops = { + .read = pcifront_bus_read, + .write = pcifront_bus_write, +}; + +#ifdef CONFIG_PCI_MSI +static int pci_frontend_enable_msix(struct pci_dev *dev, + int **vector, int nvec) +{ + int err; + int i; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + .value = nvec, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + struct msi_desc *entry; + + if (nvec > SH_INFO_MAX_VEC) { + dev_err(&dev->dev, "too much vector for pci frontend: %x." + " Increase SH_INFO_MAX_VEC.\n", nvec); + return -EINVAL; + } + + i = 0; + list_for_each_entry(entry, &dev->msi_list, list) { + op.msix_entries[i].entry = entry->msi_attrib.entry_nr; + /* Vector is useless at this point. */ + op.msix_entries[i].vector = -1; + i++; + } + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (likely(!op.value)) { + /* we get the result */ + for (i = 0; i < nvec; i++) + *(*vector+i) = op.msix_entries[i].vector; + return 0; + } else { + printk(KERN_DEBUG "enable msix get value %x\n", + op.value); + return op.value; + } + } else { + dev_err(&dev->dev, "enable msix get err %x\n", err); + return err; + } +} + +static void pci_frontend_disable_msix(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + + /* What should do for error ? */ + if (err) + dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); +} + +static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (likely(!err)) { + *(*vector) = op.value; + } else { + dev_err(&dev->dev, "pci frontend enable msi failed for dev " + "%x:%x\n", op.bus, op.devfn); + err = -EINVAL; + } + return err; +} + +static void pci_frontend_disable_msi(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (err == XEN_PCI_ERR_dev_not_found) { + /* XXX No response from backend, what shall we do? */ + printk(KERN_DEBUG "get no response from backend for disable MSI\n"); + return; + } + if (err) + /* how can pciback notify us fail? */ + printk(KERN_DEBUG "get fake response frombackend\n"); +} + +static struct xen_pci_frontend_ops pci_frontend_ops = { + .enable_msi = pci_frontend_enable_msi, + .disable_msi = pci_frontend_disable_msi, + .enable_msix = pci_frontend_enable_msix, + .disable_msix = pci_frontend_disable_msix, +}; + +static void pci_frontend_registrar(int enable) +{ + if (enable) + xen_pci_frontend = &pci_frontend_ops; + else + xen_pci_frontend = NULL; +}; +#else +static inline void pci_frontend_registrar(int enable) { }; +#endif /* CONFIG_PCI_MSI */ + +/* Claim resources for the PCI frontend as-is, backend won't allow changes */ +static int pcifront_claim_resource(struct pci_dev *dev, void *data) +{ + struct pcifront_device *pdev = data; + int i; + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + + if (!r->parent && r->start && r->flags) { + dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", + pci_name(dev), i); + if (pci_claim_resource(dev, i)) { + dev_err(&pdev->xdev->dev, "Could not claim " + "resource %s/%d! Device offline. Try " + "giving less than 4GB to domain.\n", + pci_name(dev), i); + } + } + } + + return 0; +} + +static int __devinit pcifront_scan_bus(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus, + struct pci_bus *b) +{ + struct pci_dev *d; + unsigned int devfn; + + /* Scan the bus for functions and add. + * We omit handling of PCI bridge attachment because pciback prevents + * bridges from being exported. + */ + for (devfn = 0; devfn < 0x100; devfn++) { + d = pci_get_slot(b, devfn); + if (d) { + /* Device is already known. */ + pci_dev_put(d); + continue; + } + + d = pci_scan_single_device(b, devfn); + if (d) + dev_info(&pdev->xdev->dev, "New device on " + "%04x:%02x:%02x.%02x found.\n", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + } + + return 0; +} + +static int __devinit pcifront_scan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + struct pci_bus *b; + struct pcifront_sd *sd = NULL; + struct pci_bus_entry *bus_entry = NULL; + int err = 0; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + err = -EINVAL; + goto err_out; + } +#endif + + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", + domain, bus); + + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!bus_entry || !sd) { + err = -ENOMEM; + goto err_out; + } + pcifront_init_sd(sd, domain, bus, pdev); + + b = pci_scan_bus_parented(&pdev->xdev->dev, bus, + &pcifront_bus_ops, sd); + if (!b) { + dev_err(&pdev->xdev->dev, + "Error creating PCI Frontend Bus!\n"); + err = -ENOMEM; + goto err_out; + } + + bus_entry->bus = b; + + list_add(&bus_entry->list, &pdev->root_buses); + + /* pci_scan_bus_parented skips devices which do not have a have + * devfn==0. The pcifront_scan_bus enumerates all devfn. */ + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; + +err_out: + kfree(bus_entry); + kfree(sd); + + return err; +} + +static int __devinit pcifront_rescan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + int err; + struct pci_bus *b; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + return -EINVAL; + } +#endif + + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", + domain, bus); + + b = pci_find_bus(domain, bus); + if (!b) + /* If the bus is unknown, create it. */ + return pcifront_scan_root(pdev, domain, bus); + + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; +} + +static void free_root_bus_devs(struct pci_bus *bus) +{ + struct pci_dev *dev; + + while (!list_empty(&bus->devices)) { + dev = container_of(bus->devices.next, struct pci_dev, + bus_list); + dev_dbg(&dev->dev, "removing device\n"); + pci_remove_bus_device(dev); + } +} + +static void pcifront_free_roots(struct pcifront_device *pdev) +{ + struct pci_bus_entry *bus_entry, *t; + + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); + + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { + list_del(&bus_entry->list); + + free_root_bus_devs(bus_entry->bus); + + kfree(bus_entry->bus->sysdata); + + device_unregister(bus_entry->bus->bridge); + pci_remove_bus(bus_entry->bus); + + kfree(bus_entry); + } +} + +static pci_ers_result_t pcifront_common_process(int cmd, + struct pcifront_device *pdev, + pci_channel_state_t state) +{ + pci_ers_result_t result; + struct pci_driver *pdrv; + int bus = pdev->sh_info->aer_op.bus; + int devfn = pdev->sh_info->aer_op.devfn; + struct pci_dev *pcidev; + int flag = 0; + + dev_dbg(&pdev->xdev->dev, + "pcifront AER process: cmd %x (bus:%x, devfn%x)", + cmd, bus, devfn); + result = PCI_ERS_RESULT_NONE; + + pcidev = pci_get_bus_and_slot(bus, devfn); + if (!pcidev || !pcidev->driver) { + dev_err(&pcidev->dev, + "device or driver is NULL\n"); + return result; + } + pdrv = pcidev->driver; + + if (get_driver(&pdrv->driver)) { + if (pdrv->err_handler && pdrv->err_handler->error_detected) { + dev_dbg(&pcidev->dev, + "trying to call AER service\n"); + if (pcidev) { + flag = 1; + switch (cmd) { + case XEN_PCI_OP_aer_detected: + result = pdrv->err_handler-> + error_detected(pcidev, state); + break; + case XEN_PCI_OP_aer_mmio: + result = pdrv->err_handler-> + mmio_enabled(pcidev); + break; + case XEN_PCI_OP_aer_slotreset: + result = pdrv->err_handler-> + slot_reset(pcidev); + break; + case XEN_PCI_OP_aer_resume: + pdrv->err_handler->resume(pcidev); + break; + default: + dev_err(&pdev->xdev->dev, + "bad request in aer recovery " + "operation!\n"); + + } + } + } + put_driver(&pdrv->driver); + } + if (!flag) + result = PCI_ERS_RESULT_NONE; + + return result; +} + + +static void pcifront_do_aer(struct work_struct *data) +{ + struct pcifront_device *pdev = + container_of(data, struct pcifront_device, op_work); + int cmd = pdev->sh_info->aer_op.cmd; + pci_channel_state_t state = + (pci_channel_state_t)pdev->sh_info->aer_op.err; + + /*If a pci_conf op is in progress, + we have to wait until it is done before service aer op*/ + dev_dbg(&pdev->xdev->dev, + "pcifront service aer bus %x devfn %x\n", + pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); + + pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); + + /* Post the operation to the guest. */ + wmb(); + clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(pdev->evtchn); + + /*in case of we lost an aer request in four lines time_window*/ + smp_mb__before_clear_bit(); + clear_bit(_PDEVB_op_active, &pdev->flags); + smp_mb__after_clear_bit(); + + schedule_pcifront_aer_op(pdev); + +} + +static irqreturn_t pcifront_handler_aer(int irq, void *dev) +{ + struct pcifront_device *pdev = dev; + schedule_pcifront_aer_op(pdev); + return IRQ_HANDLED; +} +static int pcifront_connect(struct pcifront_device *pdev) +{ + int err = 0; + + spin_lock(&pcifront_dev_lock); + + if (!pcifront_dev) { + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); + pcifront_dev = pdev; + } else { + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); + err = -EEXIST; + } + + spin_unlock(&pcifront_dev_lock); + + return err; +} + +static void pcifront_disconnect(struct pcifront_device *pdev) +{ + spin_lock(&pcifront_dev_lock); + + if (pdev == pcifront_dev) { + dev_info(&pdev->xdev->dev, + "Disconnecting PCI Frontend Buses\n"); + pcifront_dev = NULL; + } + + spin_unlock(&pcifront_dev_lock); +} +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev; + + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + + pdev->sh_info = + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); + if (pdev->sh_info == NULL) { + kfree(pdev); + pdev = NULL; + goto out; + } + pdev->sh_info->flags = 0; + + /*Flag for registering PV AER handler*/ + set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags); + + dev_set_drvdata(&xdev->dev, pdev); + pdev->xdev = xdev; + + INIT_LIST_HEAD(&pdev->root_buses); + + spin_lock_init(&pdev->sh_info_lock); + + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + pdev->irq = -1; + + INIT_WORK(&pdev->op_work, pcifront_do_aer); + + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", + pdev, pdev->sh_info); +out: + return pdev; +} + +static void free_pdev(struct pcifront_device *pdev) +{ + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); + + pcifront_free_roots(pdev); + + /*For PCIE_AER error handling job*/ + flush_scheduled_work(); + + if (pdev->irq >= 0) + unbind_from_irqhandler(pdev->irq, pdev); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); + + if (pdev->gnt_ref != INVALID_GRANT_REF) + gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, + (unsigned long)pdev->sh_info); + else + free_page((unsigned long)pdev->sh_info); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + + kfree(pdev); +} + +static int pcifront_publish_info(struct pcifront_device *pdev) +{ + int err = 0; + struct xenbus_transaction trans; + + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + if (err < 0) + goto out; + + pdev->gnt_ref = err; + + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); + if (err) + goto out; + + err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer, + 0, "pcifront", pdev); + + if (err < 0) + return err; + + pdev->irq = err; + +do_publish: + err = xenbus_transaction_start(&trans); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend " + "(start transaction)"); + goto out; + } + + err = xenbus_printf(trans, pdev->xdev->nodename, + "pci-op-ref", "%u", pdev->gnt_ref); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "event-channel", "%u", pdev->evtchn); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "magic", XEN_PCI_MAGIC); + + if (err) { + xenbus_transaction_end(trans, 1); + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend"); + goto out; + } else { + err = xenbus_transaction_end(trans, 0); + if (err == -EAGAIN) + goto do_publish; + else if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error completing transaction " + "for backend"); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + + dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); + +out: + return err; +} + +static int __devinit pcifront_try_connect(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + char str[64]; + unsigned int domain, bus; + + + /* Only connect once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + err = pcifront_connect(pdev); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error connecting PCI Frontend"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_scan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_scan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_try_disconnect(struct pcifront_device *pdev) +{ + int err = 0; + enum xenbus_state prev_state; + + + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + + if (prev_state >= XenbusStateClosing) + goto out; + + if (prev_state == XenbusStateConnected) { + pcifront_free_roots(pdev); + pcifront_disconnect(pdev); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); + +out: + + return err; +} + +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + unsigned int domain, bus; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_rescan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_rescan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_detach_devices(struct pcifront_device *pdev) +{ + int err = 0; + int i, num_devs; + unsigned int domain, bus, slot, func; + struct pci_bus *pci_bus; + struct pci_dev *pci_dev; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateConnected) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI devices"); + goto out; + } + + /* Find devices being detached and remove them. */ + for (i = 0; i < num_devs; i++) { + int l, state; + l = snprintf(str, sizeof(str), "state-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", + &state); + if (err != 1) + state = XenbusStateUnknown; + + if (state != XenbusStateClosing) + continue; + + /* Remove device. */ + l = snprintf(str, sizeof(str), "vdev-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err != 4) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI device %d", i); + goto out; + } + + pci_bus = pci_find_bus(domain, bus); + if (!pci_bus) { + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", + domain, bus); + continue; + } + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); + if (!pci_dev) { + dev_dbg(&pdev->xdev->dev, + "Cannot get PCI device %04x:%02x:%02x.%02x\n", + domain, bus, slot, func); + continue; + } + pci_remove_bus_device(pci_dev); + pci_dev_put(pci_dev); + + dev_dbg(&pdev->xdev->dev, + "PCI device %04x:%02x:%02x.%02x removed.\n", + domain, bus, slot, func); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); + +out: + return err; +} + +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, + enum xenbus_state be_state) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + + switch (be_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + break; + + case XenbusStateConnected: + pcifront_try_connect(pdev); + break; + + case XenbusStateClosing: + dev_warn(&xdev->dev, "backend going away!\n"); + pcifront_try_disconnect(pdev); + break; + + case XenbusStateReconfiguring: + pcifront_detach_devices(pdev); + break; + + case XenbusStateReconfigured: + pcifront_attach_devices(pdev); + break; + } +} + +static int pcifront_xenbus_probe(struct xenbus_device *xdev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pcifront_device *pdev = alloc_pdev(xdev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, + "Error allocating pcifront_device struct"); + goto out; + } + + err = pcifront_publish_info(pdev); + if (err) + free_pdev(pdev); + +out: + return err; +} + +static int pcifront_xenbus_remove(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + if (pdev) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_pcifront_driver = { + .name = "pcifront", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pcifront_xenbus_probe, + .remove = pcifront_xenbus_remove, + .otherend_changed = pcifront_backend_changed, +}; + +static int __init pcifront_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + pci_frontend_registrar(1 /* enable */); + + return xenbus_register_frontend(&xenbus_pcifront_driver); +} + +static void __exit pcifront_cleanup(void) +{ + xenbus_unregister_driver(&xenbus_pcifront_driver); + pci_frontend_registrar(0 /* disable */); +} +module_init(pcifront_init); +module_exit(pcifront_cleanup); + +MODULE_DESCRIPTION("Xen PCI passthrough frontend."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:pci"); diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h new file mode 100644 index 0000000..d9922ae --- /dev/null +++ b/include/xen/interface/io/pciif.h @@ -0,0 +1,112 @@ +/* + * PCI Backend/Frontend Common Data Structures & Macros + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Ryan Wilson + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +/*used for pcie aer handling*/ +struct xen_pcie_aer_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ -- cgit v0.10.2 From 74226b8c8a0b10841129916191205095af928da5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 19 Aug 2010 13:34:58 -0400 Subject: xen/pci: Request ACS when Xen-SWIOTLB is activated. It used to done in the Xen startup code but that is not really appropiate. [v2: Update Kconfig with PCI requirement] Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index a013ec9..be4d80a 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -1,6 +1,7 @@ /* Glue code to lib/swiotlb-xen.c */ #include +#include #include #include @@ -54,5 +55,8 @@ void __init pci_xen_swiotlb_init(void) if (xen_swiotlb) { xen_swiotlb_init(1); dma_ops = &xen_swiotlb_dma_ops; + + /* Make sure ACS will be enabled */ + pci_request_acs(); } } diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 60d71e9..f70a627 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -74,6 +74,6 @@ config XEN_PLATFORM_PCI config SWIOTLB_XEN def_bool y - depends on SWIOTLB + depends on PCI && SWIOTLB endmenu -- cgit v0.10.2 From c5f8e29d4cf3b29fe58df90f51676d812dfd16bd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 4 Oct 2010 13:32:26 -0400 Subject: MAINTAINERS: Add myself for Xen PCI and Xen SWIOTLB maintainer. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/MAINTAINERS b/MAINTAINERS index 62c0ace..dd8bbe0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6492,6 +6492,20 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86. S: Maintained F: drivers/platform/x86 +XEN PCI SUBSYSTEM +M: Konrad Rzeszutek Wilk +L: xen-devel@lists.xensource.com +S: Supported +F: arch/x86/pci/*xen* +F: drivers/pci/*xen* + +XEN SWIOTLB SUBSYSTEM +M: Konrad Rzeszutek Wilk +L: xen-devel@lists.xensource.com +S: Supported +F: arch/x86/xen/*swiotlb* +F: drivers/xen/*swiotlb* + XEN HYPERVISOR INTERFACE M: Jeremy Fitzhardinge M: Chris Wright -- cgit v0.10.2 From 2775609c5d9a9d40ad5f101e23438ceacee4250b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 8 Oct 2010 11:06:20 -0400 Subject: swiotlb-xen: On x86-32 builts, select SWIOTLB instead of depending on it. We used to depend on CONFIG_SWIOTLB, but that is disabled by default. So when compiling we get this compile error: arch/x86/xen/pci-swiotlb-xen.c: In function 'pci_xen_swiotlb_detect': arch/x86/xen/pci-swiotlb-xen.c:48: error: lvalue required as left operand of assignment Fix it by actually activating the SWIOTLB library. Reported-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index f70a627..6e6180c 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -74,6 +74,7 @@ config XEN_PLATFORM_PCI config SWIOTLB_XEN def_bool y - depends on PCI && SWIOTLB + depends on PCI + select SWIOTLB endmenu -- cgit v0.10.2 From 2c52f8d3f787ec8e39022da7d57b9f4f482ad2d0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Oct 2010 17:11:10 -0400 Subject: x86: xen: Sanitse irq handling (part two) Thomas Gleixner cleaned up event handling to use the sparse_irq handling, but the xen-pcifront patches utilized the old mechanism. This fixes them to work with sparse_irq handling. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7016a73..3df53de 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -586,8 +586,7 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) * we are using the !xen_initial_domain() to drop in the function.*/ if (identity_mapped_irq(gsi) || !xen_initial_domain()) { irq = gsi; - irq_to_desc_alloc_node(irq, 0); - dynamic_irq_init(irq); + irq_alloc_desc_at(irq, 0); } else irq = find_unbound_irq(); @@ -602,7 +601,7 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - dynamic_irq_cleanup(irq); + irq_free_desc(irq); irq = -ENOSPC; goto out; } @@ -629,7 +628,7 @@ int xen_destroy_irq(int irq) irq_info[irq] = mk_unbound_info(); - dynamic_irq_cleanup(irq); + irq_free_desc(irq); out: spin_unlock(&irq_mapping_update_lock); -- cgit v0.10.2 From 31f9783f48388aacf7f791d607b16d36e63a2b79 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Oct 2010 11:34:16 -0400 Subject: MAINTAINERS: Add myself to the Xen Hypervisor Interface and remove Chris Wright. Chris is working on other stuff now, and I am working full-time with Jeremy on these bits. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Acked-by: Chris Wright diff --git a/MAINTAINERS b/MAINTAINERS index dd8bbe0..59e9999 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6507,10 +6507,10 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XEN HYPERVISOR INTERFACE -M: Jeremy Fitzhardinge -M: Chris Wright +M: Jeremy Fitzhardinge +M: Konrad Rzeszutek Wilk +L: xen-devel@lists.xen.org L: virtualization@lists.osdl.org -L: xen-devel@lists.xensource.com S: Supported F: arch/x86/xen/ F: drivers/*/xen-*front.c -- cgit v0.10.2 From 2d7d06dd8ffcbafc03bf2c1cb4b2fb2c4c405ec1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Oct 2010 13:04:13 -0400 Subject: xen: Update Makefile with CONFIG_BLOCK dependency for biomerge.c Without this dependency we get these compile errors: linux-next-20101020/drivers/xen/biomerge.c: In function 'xen_biovec_phys_mergeable': linux-next-20101020/drivers/xen/biomerge.c:8: error: dereferencing pointer to incomplete type linux-next-20101020/drivers/xen/biomerge.c:9: error: dereferencing pointer to incomplete type linux-next-20101020/drivers/xen/biomerge.c:11: error: implicit declaration of function '__BIOVEC_PHYS_MERGEABLE' Signed-off-by: Konrad Rzeszutek Wilk Reported-by: Randy Dunlap diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index b47f5da..b978645 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,9 +1,10 @@ -obj-y += grant-table.o features.o events.o manage.o biomerge.o +obj-y += grant-table.o features.o events.o manage.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) +obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -- cgit v0.10.2 From 5bba6c56dc99ff88f79a79572e29ecf445710878 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 21 Oct 2010 09:36:07 -0400 Subject: X86/PCI: Remove the dependency on isapnp_disable. This looks to be vestigial dependency that had never been used even in the original code base (2.6.18) from which this driver was up-ported. Without this fix, with the CONFIG_ISAPNP, we get this compile failure: arch/x86/pci/xen.c: In function 'pci_xen_init': arch/x86/pci/xen.c:138: error: 'isapnp_disable' undeclared (first use in this function) arch/x86/pci/xen.c:138: error: (Each undeclared identifier is reported only once arch/x86/pci/xen.c:138: error: for each function it appears in.) Reported-by: Li Zefan Tested-by: Li Zefan Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index b19c873..4e37106 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -133,11 +133,6 @@ int __init pci_xen_init(void) acpi_noirq = 1; #endif -#ifdef CONFIG_ISAPNP - /* Stop isapnp from probing */ - isapnp_disable = 1; -#endif - #ifdef CONFIG_PCI_MSI x86_msi.setup_msi_irqs = xen_setup_msi_irqs; x86_msi.teardown_msi_irq = xen_teardown_msi_irq; -- cgit v0.10.2 From 7a043f119c0e4b460306f868d9638ac55c6afa6f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 1 Jul 2010 17:08:14 +0100 Subject: xen: support pirq != irq PHYSDEVOP_map_pirq might return a pirq different from what we asked if we are running as an HVM guest, so we need to be able to support pirqs that are different from linux irqs. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 3df53de..018a962 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -90,6 +90,7 @@ struct irq_info unsigned short virq; enum ipi_vector ipi; struct { + unsigned short pirq; unsigned short gsi; unsigned char vector; unsigned char flags; @@ -100,6 +101,7 @@ struct irq_info #define PIRQ_SHAREABLE (1 << 1) static struct irq_info *irq_info; +static int *pirq_to_irq; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -147,11 +149,12 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) .cpu = 0, .u.virq = virq }; } -static struct irq_info mk_pirq_info(unsigned short evtchn, +static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq, unsigned short gsi, unsigned short vector) { return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; + .cpu = 0, + .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } }; } /* @@ -193,6 +196,16 @@ static unsigned virq_from_irq(unsigned irq) return info->u.virq; } +static unsigned pirq_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.pirq; +} + static unsigned gsi_from_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); @@ -365,6 +378,16 @@ static int get_nr_hw_irqs(void) return ret; } +static int find_unbound_pirq(void) +{ + int i; + for (i = 0; i < nr_irqs; i++) { + if (pirq_to_irq[i] < 0) + return i; + } + return -1; +} + static int find_unbound_irq(void) { struct irq_data *data; @@ -410,7 +433,7 @@ static bool identity_mapped_irq(unsigned irq) static void pirq_unmask_notify(int irq) { - struct physdev_eoi eoi = { .irq = irq }; + struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; if (unlikely(pirq_needs_eoi(irq))) { int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -425,7 +448,7 @@ static void pirq_query_unmask(int irq) BUG_ON(info->type != IRQT_PIRQ); - irq_status.irq = irq; + irq_status.irq = pirq_from_irq(irq); if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) irq_status.flags = 0; @@ -453,7 +476,7 @@ static unsigned int startup_pirq(unsigned int irq) if (VALID_EVTCHN(evtchn)) goto out; - bind_pirq.pirq = irq; + bind_pirq.pirq = pirq_from_irq(irq); /* NB. We are happy to share unless we are probing. */ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? BIND_PIRQ__WILL_SHARE : 0; @@ -556,28 +579,32 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } -/* xen_allocate_irq might allocate irqs from the top down, as a +int xen_allocate_pirq(unsigned gsi, int shareable, char *name) +{ + return xen_map_pirq_gsi(gsi, gsi, shareable, name); +} + +/* xen_map_pirq_gsi might allocate irqs from the top down, as a * consequence don't assume that the irq number returned has a low value * or can be used as a pirq number unless you know otherwise. * - * One notable exception is when xen_allocate_irq is called passing an + * One notable exception is when xen_map_pirq_gsi is called passing an * hardware gsi as argument, in that case the irq number returned - * matches the gsi number passed as first argument. - - * Note: We don't assign an - * event channel until the irq actually started up. Return an - * existing irq if we've already got one for the gsi. + * matches the gsi number passed as second argument. + * + * Note: We don't assign an event channel until the irq actually started + * up. Return an existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi, int shareable, char *name) +int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) { - int irq; + int irq = 0; struct physdev_irq irq_op; spin_lock(&irq_mapping_update_lock); irq = find_irq_by_gsi(gsi); if (irq != -1) { - printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n", + printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); goto out; /* XXX need refcount? */ } @@ -606,8 +633,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) goto out; } - irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector); irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; + pirq_to_irq[pirq] = irq; out: spin_unlock(&irq_mapping_update_lock); @@ -1327,6 +1355,10 @@ void __init xen_init_IRQ(void) GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); + pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_irqs; i++) + pirq_to_irq[i] = -1; + evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); for (i = 0; i < NR_EVENT_CHANNELS; i++) diff --git a/include/xen/events.h b/include/xen/events.h index c1717ca..deec8fa 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -71,6 +71,7 @@ void xen_hvm_evtchn_do_upcall(void); * GSIs are identity mapped; others are dynamically allocated as * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); +int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v0.10.2 From 01557baff6e9c371d4c96e01089dca32cf347500 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 20 Aug 2010 14:46:52 +0100 Subject: xen: get the maximum number of pirqs from xen Use PHYSDEVOP_get_nr_pirqs to get the maximum number of pirqs from xen. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 018a962..07e56e5 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -102,6 +102,7 @@ struct irq_info static struct irq_info *irq_info; static int *pirq_to_irq; +static int nr_pirqs; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -378,10 +379,12 @@ static int get_nr_hw_irqs(void) return ret; } +/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs + * succeeded otherwise nr_pirqs won't hold the right value */ static int find_unbound_pirq(void) { int i; - for (i = 0; i < nr_irqs; i++) { + for (i = nr_pirqs-1; i >= 0; i--) { if (pirq_to_irq[i] < 0) return i; } @@ -602,6 +605,13 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) spin_lock(&irq_mapping_update_lock); + if ((pirq > nr_pirqs) || (gsi > nr_irqs)) { + printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n", + pirq > nr_pirqs ? "nr_pirqs" :"", + gsi > nr_irqs ? "nr_irqs" : ""); + goto out; + } + irq = find_irq_by_gsi(gsi); if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", @@ -1349,14 +1359,26 @@ void xen_callback_vector(void) {} void __init xen_init_IRQ(void) { - int i; + int i, rc; + struct physdev_nr_pirqs op_nr_pirqs; cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); - pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); - for (i = 0; i < nr_irqs; i++) + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs); + if (rc < 0) { + nr_pirqs = nr_irqs; + if (rc != -ENOSYS) + printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc); + } else { + if (xen_pv_domain() && !xen_initial_domain()) + nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs); + else + nr_pirqs = op_nr_pirqs.nr_pirqs; + } + pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_pirqs; i++) pirq_to_irq[i] = -1; evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index cd69391..fbb5883 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -121,6 +121,12 @@ struct physdev_op { } u; }; +#define PHYSDEVOP_get_nr_pirqs 22 +struct physdev_nr_pirqs { + /* OUT */ + uint32_t nr_pirqs; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -- cgit v0.10.2 From 42a1de56f35a9c87932f45439dc1b09c8da0cc95 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 24 Jun 2010 16:42:04 +0100 Subject: xen: implement xen_hvm_register_pirq xen_hvm_register_pirq allows the kernel to map a GSI into a Xen pirq and receive the interrupt as an event channel from that point on. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 4e37106..08e3cdc 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -17,6 +17,44 @@ #include #include +#ifdef CONFIG_ACPI +static int xen_hvm_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_hvm_domain()) + return -1; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = -1; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); + + return irq; +} +#endif + #if defined(CONFIG_PCI_MSI) #include diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 07e56e5..239b011 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,8 @@ enum xen_irq_type { * event channel - irq->event channel mapping * cpu - cpu this event channel is bound to * index - type-specific information: - * PIRQ - vector, with MSB being "needs EIO" + * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM + * guest, or GSI (real passthrough IRQ) of the device. * VIRQ - virq number * IPI - IPI vector * EVTCHN - diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index fbb5883..69a72b9 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -106,6 +106,36 @@ struct physdev_irq { uint32_t vector; }; +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN */ + int index; + /* IN or OUT */ + int pirq; + /* IN */ + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. -- cgit v0.10.2 From 2f065aef17b8d50a51a72451d03c7d7304249fb5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jun 2010 16:59:16 +0100 Subject: acpi: use indirect call to register gsi in different modes Rather than using a tree of conditionals, use function pointer for acpi_register_gsi. Signed-off-by: Jeremy Fitzhardinge Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Rafael J. Wysocki diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c05872a..031f0c2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -513,35 +513,61 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) return 0; } -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +static int acpi_register_gsi_pic(struct device *dev, u32 gsi, + int trigger, int polarity) { - unsigned int irq; - unsigned int plat_gsi = gsi; - #ifdef CONFIG_PCI /* * Make sure all (legacy) PCI IRQs are set as level-triggered. */ - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); - } + if (trigger == ACPI_LEVEL_SENSITIVE) + eisa_set_level_irq(gsi); #endif + return gsi; +} + +static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, + int trigger, int polarity) +{ #ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); - } + gsi = mp_register_gsi(dev, gsi, trigger, polarity); #endif + + return gsi; +} + +static int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + unsigned int irq; + unsigned int plat_gsi = gsi; + + plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity); irq = gsi_to_irq(plat_gsi); return irq; } +void __init acpi_set_irq_model_pic(void) +{ + acpi_irq_model = ACPI_IRQ_MODEL_PIC; + __acpi_register_gsi = acpi_register_gsi_pic; + acpi_ioapic = 0; +} + +void __init acpi_set_irq_model_ioapic(void) +{ + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + __acpi_register_gsi = acpi_register_gsi_ioapic; + acpi_ioapic = 1; +} + /* * ACPI based hotplug support for CPU */ @@ -1259,8 +1285,7 @@ static void __init acpi_process_madt(void) */ error = acpi_parse_madt_ioapic_entries(); if (!error) { - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_ioapic = 1; + acpi_set_irq_model_ioapic(); smp_found_config = 1; } -- cgit v0.10.2 From 90f6881e6430ea7b38b9e0f9837719b1935616e0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jun 2010 17:05:41 +0100 Subject: xen: add xen hvm acpi_register_gsi variant Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Rafael J. Wysocki diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 92091de..55d106b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -93,6 +93,9 @@ extern u8 acpi_sci_flags; extern int acpi_sci_override_gsi; void acpi_pic_sci_set_trigger(unsigned int, u16); +extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity); + static inline void disable_acpi(void) { acpi_disabled = 1; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 031f0c2..71232b9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -537,7 +537,8 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, return gsi; } -static int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; +int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity) = acpi_register_gsi_pic; /* * success: return IRQ number (>=0) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 08e3cdc..3a4ab0b 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -53,6 +53,12 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return irq; } + +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_hvm_register_pirq(gsi, trigger); +} #endif #if defined(CONFIG_PCI_MSI) -- cgit v0.10.2 From 3942b740e5183caad47a4a3fcb37a4509ce7af83 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 24 Jun 2010 17:50:18 +0100 Subject: xen: support GSI -> pirq remapping in PV on HVM guests Disable pcifront when running on HVM: it is meant to be used with pv guests that don't have PCI bus. Use acpi_register_gsi_xen_hvm to remap GSIs into pirqs. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 449c82f..f89a42a 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -3,10 +3,15 @@ #if defined(CONFIG_PCI_XEN) extern int __init pci_xen_init(void); +extern int __init pci_xen_hvm_init(void); #define pci_xen 1 #else #define pci_xen 0 #define pci_xen_init (0) +static inline int pci_xen_hvm_init(void) +{ + return -1; +} #endif #if defined(CONFIG_PCI_MSI) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 3a4ab0b..d5284c4 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -14,6 +14,7 @@ #include +#include #include #include @@ -184,3 +185,18 @@ int __init pci_xen_init(void) #endif return 0; } + +int __init pci_xen_hvm_init(void) +{ + if (!xen_feature(XENFEAT_hvm_pirqs)) + return 0; + +#ifdef CONFIG_ACPI + /* + * We don't want to change the actual ACPI delivery model, + * just how GSIs get registered. + */ + __acpi_register_gsi = acpi_register_gsi_xen_hvm; +#endif + return 0; +} diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 239b011..32269bc 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -623,7 +623,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore * we are using the !xen_initial_domain() to drop in the function.*/ - if (identity_mapped_irq(gsi) || !xen_initial_domain()) { + if (identity_mapped_irq(gsi) || (!xen_initial_domain() && + xen_pv_domain())) { irq = gsi; irq_alloc_desc_at(irq, 0); } else @@ -1397,6 +1398,9 @@ void __init xen_init_IRQ(void) if (xen_hvm_domain()) { xen_callback_vector(); native_init_IRQ(); + /* pci_xen_hvm_init must be called after native_init_IRQ so that + * __acpi_register_gsi can point at the right function */ + pci_xen_hvm_init(); } else { irq_ctx_init(smp_processor_id()); } diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 70d2563..b6ca39a 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -47,6 +47,9 @@ /* x86: pvclock algorithm is safe to use on HVM */ #define XENFEAT_hvm_safe_pvclock 9 +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ -- cgit v0.10.2 From 809f9267bbaba7765cdb86a47f2e6e4bf4951b69 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 1 Jul 2010 17:10:39 +0100 Subject: xen: map MSIs into pirqs Map MSIs into pirqs, writing 0 in the MSI vector data field and the pirq number in the MSI destination id field. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d5284c4..b5bd642 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -64,10 +64,62 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, #if defined(CONFIG_PCI_MSI) #include +#include struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, + struct msi_msg *msg) +{ + /* We set vector == 0 to tell the hypervisor we don't care about it, + * but we want a pirq setup instead. + * We use the dest_id field to pass the pirq that we want. */ + msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); + msg->address_lo = + MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_MODE_PHYSICAL | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID(pirq); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + /* delivery mode reserved */ + (3 << 8) | + MSI_DATA_VECTOR(0); +} + +static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, pirq, ret = 0; + struct msi_desc *msidesc; + struct msi_msg msg; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq); + if (irq < 0 || pirq < 0) + goto error; + printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); + xen_msi_compose_msg(dev, pirq, &msg); + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + write_msi_msg(irq, &msg); + } + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + return ret; +} + /* * For MSI interrupts we have to use drivers/xen/event.s functions to * allocate an irq_desc and setup the right */ @@ -198,5 +250,10 @@ int __init pci_xen_hvm_init(void) */ __acpi_register_gsi = acpi_register_gsi_xen_hvm; #endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 32269bc..efa683e 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -656,6 +656,28 @@ out: return irq; } +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) +{ + spin_lock(&irq_mapping_update_lock); + + *irq = find_unbound_irq(); + if (*irq == -1) + goto out; + + *pirq = find_unbound_pirq(); + if (*pirq == -1) + goto out; + + set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, + handle_level_irq, name); + + irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); + pirq_to_irq[*pirq] = *irq; + +out: + spin_unlock(&irq_mapping_update_lock); +} + int xen_destroy_irq(int irq) { struct irq_desc *desc; diff --git a/include/xen/events.h b/include/xen/events.h index deec8fa..0c58db6 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,6 +72,8 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); +/* Allocate an irq and a pirq to be used with MSIs. */ +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v0.10.2 From 6b0661a5e6fbfb159b78a39c0476905aa9b575fe Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 2 Sep 2010 15:47:32 +0100 Subject: xen: introduce XEN_DOM0 as a silent option Add XEN_DOM0 to arch/x86/xen/Kconfig as a silent compile time option that gets enabled when xen and basic x86, acpi and pci support are selected. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 68128a1..a234b9a 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -13,6 +13,16 @@ config XEN kernel to boot in a paravirtualized environment under the Xen hypervisor. +config XEN_DOM0 + def_bool y + depends on XEN && PCI_XEN && SWIOTLB_XEN + depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + +# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST +# name in tools. +config XEN_PRIVILEGED_GUEST + def_bool XEN_DOM0 + config XEN_PVHVM def_bool y depends on XEN -- cgit v0.10.2 From 38aa66fcb79e0a46c24bba96b6f2b851a6ec2037 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Sep 2010 14:51:39 +0100 Subject: xen: remap GSIs as pirqs when running as initial domain Implement xen_register_gsi to setup the correct triggering and polarity properties of a gsi. Implement xen_register_pirq to register a particular gsi as pirq and receive interrupts as events. Call xen_setup_pirqs to register all the legacy ISA irqs as pirqs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index f89a42a..2329b3e 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -13,6 +13,13 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif +#if defined(CONFIG_XEN_DOM0) +void __init xen_setup_pirqs(void); +#else +static inline void __init xen_setup_pirqs(void) +{ +} +#endif #if defined(CONFIG_PCI_MSI) #if defined(CONFIG_PCI_XEN) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index b5bd642..dd0b5fd 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -257,3 +257,138 @@ int __init pci_xen_hvm_init(void) #endif return 0; } + +#ifdef CONFIG_XEN_DOM0 +static int xen_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_pv_domain()) + return -1; + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_allocate_pirq(gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d\n", irq); + + if (irq < 0) + goto out; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = irq; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + +out: + return irq; +} + +static int xen_register_gsi(u32 gsi, int triggering, int polarity) +{ + int rc, irq; + struct physdev_setup_gsi setup_gsi; + + if (!xen_pv_domain()) + return -1; + + printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", + gsi, triggering, polarity); + + irq = xen_register_pirq(gsi, triggering); + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (rc == -EEXIST) + printk(KERN_INFO "Already setup the GSI :%d\n", gsi); + else if (rc) { + printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", + gsi, rc); + } + + return irq; +} + +static __init void xen_setup_acpi_sci(void) +{ + int rc; + int trigger, polarity; + int gsi = acpi_sci_override_gsi; + + if (!gsi) + return; + + rc = acpi_get_override_irq(gsi, &trigger, &polarity); + if (rc) { + printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" + " sci, rc=%d\n", rc); + return; + } + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + + printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " + "polarity=%d\n", gsi, trigger, polarity); + + gsi = xen_register_gsi(gsi, trigger, polarity); + printk(KERN_INFO "xen: acpi sci %d\n", gsi); + + return; +} + +static int acpi_register_gsi_xen(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_register_gsi(gsi, trigger, polarity); +} + +static int __init pci_xen_initial_domain(void) +{ + xen_setup_acpi_sci(); + __acpi_register_gsi = acpi_register_gsi_xen; + + return 0; +} + +void __init xen_setup_pirqs(void) +{ + int irq; + + pci_xen_initial_domain(); + + if (0 == nr_ioapics) { + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) + xen_allocate_pirq(irq, 0, "xt-pic"); + return; + } + + /* Pre-allocate legacy irqs */ + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { + int trigger, polarity; + + if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) + continue; + + xen_register_pirq(irq, + trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); + } +} +#endif diff --git a/drivers/xen/events.c b/drivers/xen/events.c index efa683e..c649ac0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -681,6 +681,8 @@ out: int xen_destroy_irq(int irq) { struct irq_desc *desc; + struct physdev_unmap_pirq unmap_irq; + struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; spin_lock(&irq_mapping_update_lock); @@ -689,6 +691,15 @@ int xen_destroy_irq(int irq) if (!desc) goto out; + if (xen_initial_domain()) { + unmap_irq.pirq = info->u.pirq.gsi; + unmap_irq.domid = DOMID_SELF; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); + if (rc) { + printk(KERN_WARNING "unmap irq failed %d\n", rc); + goto out; + } + } irq_info[irq] = mk_unbound_info(); irq_free_desc(irq); @@ -1425,5 +1436,7 @@ void __init xen_init_IRQ(void) pci_xen_hvm_init(); } else { irq_ctx_init(smp_processor_id()); + if (xen_initial_domain()) + xen_setup_pirqs(); } } diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 69a72b9..a85d76c 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -151,6 +151,16 @@ struct physdev_op { } u; }; +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + #define PHYSDEVOP_get_nr_pirqs 22 struct physdev_nr_pirqs { /* OUT */ -- cgit v0.10.2 From f731e3ef02b4744f4d7ca2f63539b900e47db31f Mon Sep 17 00:00:00 2001 From: Qing He Date: Mon, 11 Oct 2010 15:30:09 +0100 Subject: xen: remap MSIs into pirqs when running as initial domain Implement xen_create_msi_irq to create an msi and remap it as pirq. Use xen_create_msi_irq to implement an initial domain specific version of setup_msi_irqs. Signed-off-by: Qing He Signed-off-by: Yunhong Jiang Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index dd0b5fd..b3f4b30 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -135,14 +135,12 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (!v) return -ENOMEM; - if (!xen_initial_domain()) { - if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); - else - ret = xen_pci_frontend_enable_msi(dev, &v); - if (ret) - goto error; - } + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, &v); + if (ret) + goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = xen_allocate_pirq(v[i], 0, /* not sharable */ @@ -172,23 +170,40 @@ error: static void xen_teardown_msi_irqs(struct pci_dev *dev) { - /* Only do this when were are in non-privileged mode.*/ - if (!xen_initial_domain()) { - struct msi_desc *msidesc; - - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - if (msidesc->msi_attrib.is_msix) - xen_pci_frontend_disable_msix(dev); - else - xen_pci_frontend_disable_msi(dev); - } + struct msi_desc *msidesc; + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); } static void xen_teardown_msi_irq(unsigned int irq) { xen_destroy_irq(irq); } + +static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret; + struct msi_desc *msidesc; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_create_msi_irq(dev, msidesc, type); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error; + } + return 0; + +error: + xen_destroy_irq(irq); + return ret; +} #endif static int xen_pcifront_enable_irq(struct pci_dev *dev) @@ -362,6 +377,10 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi, static int __init pci_xen_initial_domain(void) { +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index c649ac0..a7d9555 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -656,6 +657,10 @@ out: return irq; } +#ifdef CONFIG_PCI_MSI +#include +#include "../pci/msi.h" + void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) { spin_lock(&irq_mapping_update_lock); @@ -678,6 +683,61 @@ out: spin_unlock(&irq_mapping_update_lock); } +int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) +{ + int irq = -1; + struct physdev_map_pirq map_irq; + int rc; + int pos; + u32 table_offset, bir; + + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; + + if (type == PCI_CAP_ID_MSIX) { + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + pci_read_config_dword(dev, msix_table_offset_reg(pos), + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + spin_lock(&irq_mapping_update_lock); + + irq = find_unbound_irq(); + + if (irq == -1) + goto out; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + + irq_free_desc(irq); + + irq = -1; + goto out; + } + irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index); + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, + (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi"); + +out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} +#endif + int xen_destroy_irq(int irq) { struct irq_desc *desc; diff --git a/include/xen/events.h b/include/xen/events.h index 0c58db6..8fa27dc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,8 +72,12 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); + +#ifdef CONFIG_PCI_MSI /* Allocate an irq and a pirq to be used with MSIs. */ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); +int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); +#endif /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v0.10.2 From 98511f3532eb7fce274f37d94f29790922799e15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 3 Sep 2010 14:55:16 +0100 Subject: xen: map a dummy page for local apic and ioapic in xen_set_fixmap Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 42086ac..ffc5e24 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1861,6 +1861,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, } #endif /* CONFIG_X86_64 */ +static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; + static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { pte_t pte; @@ -1881,15 +1883,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #else case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: #endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: /* All local page mappings */ pte = pfn_pte(phys, prot); break; +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ + pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + break; +#endif + +#ifdef CONFIG_X86_IO_APIC + case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: + /* + * We just don't map the IO APIC - all access is via + * hypercalls. Keep the address in the pte for reference. + */ + pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + break; +#endif + case FIX_PARAVIRT_BOOTMAP: /* This is an MFN, but it isn't an IO mapping from the IO domain */ @@ -2027,6 +2042,8 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; vmap_lazy_unmap = false; + + memset(dummy_mapping, 0xff, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ -- cgit v0.10.2 From 801fd14a725ef7757d33f07b83415cdd2165e50a Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 23 Sep 2010 12:06:25 +0100 Subject: xen: use vcpu_ops to setup cpu masks Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 25f232b..1386767 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -156,11 +156,16 @@ static void __init xen_fill_possible_map(void) { int i, rc; + num_processors = 0; + disabled_cpus = 0; for (i = 0; i < nr_cpu_ids; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; set_cpu_possible(i, true); + } else { + set_cpu_possible(i, false); + set_cpu_present(i, false); } } } @@ -190,6 +195,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); + xen_fill_possible_map(); + if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) panic("could not allocate xen_cpu_initialized_map\n"); @@ -480,6 +487,5 @@ static const struct smp_ops xen_smp_ops __initdata = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); xen_init_spinlocks(); } -- cgit v0.10.2 From b37a56d6f3c0595d8d65ddd5b7610d11735c4978 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 2 Sep 2010 14:53:56 +0100 Subject: xen: Initialize xenbus for dom0. Do initial xenbus/xenstore setup in dom0. In dom0 we need to actually allocate the xenstore resources, rather than being given them from outside. [ Impact: initialize Xenbus ] Signed-off-by: Juan Quintela Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d409495..d242610 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -801,6 +801,7 @@ device_initcall(xenbus_probe_initcall); static int __init xenbus_init(void) { int err = 0; + unsigned long page = 0; DPRINTK(""); @@ -821,7 +822,31 @@ static int __init xenbus_init(void) * Domain0 doesn't have a store_evtchn or store_mfn yet. */ if (xen_initial_domain()) { - /* dom0 not yet supported */ + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_error; + + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = 0; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_error; + + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; + + xen_store_interface = mfn_to_virt(xen_store_mfn); } else { if (xen_hvm_domain()) { uint64_t v = 0; @@ -867,6 +892,8 @@ static int __init xenbus_init(void) bus_unregister(&xenbus_frontend.bus); out_error: + if (page != 0) + free_page(page); return err; } -- cgit v0.10.2 From 4ec5387cc36c6472a2ff2c82e9865abe8cab96c2 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 2 Sep 2010 15:45:43 +0100 Subject: xen: add the direct mapping area for ISA bus access add the direct mapping area for ISA bus access when running as initial domain Signed-off-by: Juan Quintela Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1ccfa1b..9efb004 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1186,6 +1186,7 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_ident_map_ISA(); init_mm.pgd = pgd; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ffc5e24..eed9c7c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1682,6 +1682,7 @@ static void *m2v(phys_addr_t maddr) return __ka(m2p(maddr)); } +/* Set the page permissions on an identity-mapped pages */ static void set_page_prot(void *addr, pgprot_t prot) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; @@ -1929,6 +1930,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +__init void xen_ident_map_ISA(void) +{ + unsigned long pa; + + /* + * If we're dom0, then linear map the ISA machine addresses into + * the kernel's address space. + */ + if (!xen_initial_domain()) + return; + + xen_raw_printk("Xen: setup ISA identity maps\n"); + + for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { + pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); + + if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) + BUG(); + } + + xen_flush_tlb(); +} + static __init void xen_post_allocator_init(void) { pv_mmu_ops.set_pte = xen_set_pte; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c413132..62ceb78 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -119,6 +119,9 @@ char * __init xen_memory_setup(void) * Even though this is normal, usable memory under Xen, reserve * ISA memory anyway because too many things think they can poke * about in there. + * + * In a dom0 kernel, this region is identity mapped with the + * hardware ISA area, so it really is out of bounds. */ e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); -- cgit v0.10.2 From 4fe7d5a708a955b35e3fdc4dea3e0b7a6ae2eb06 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Sep 2010 16:17:06 +0100 Subject: xen: make hvc_xen console work for dom0. Use the console hypercalls for dom0 console. [ Impact: Add Xen dom0 console ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Juan Quintela Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index 60446f8..1f7e13a 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -78,7 +78,7 @@ static int __write_console(const char *data, int len) return sent; } -static int write_console(uint32_t vtermno, const char *data, int len) +static int domU_write_console(uint32_t vtermno, const char *data, int len) { int ret = len; @@ -101,7 +101,7 @@ static int write_console(uint32_t vtermno, const char *data, int len) return ret; } -static int read_console(uint32_t vtermno, char *buf, int len) +static int domU_read_console(uint32_t vtermno, char *buf, int len) { struct xencons_interface *intf = xencons_interface(); XENCONS_RING_IDX cons, prod; @@ -122,28 +122,62 @@ static int read_console(uint32_t vtermno, char *buf, int len) return recv; } -static const struct hv_ops hvc_ops = { - .get_chars = read_console, - .put_chars = write_console, +static struct hv_ops domU_hvc_ops = { + .get_chars = domU_read_console, + .put_chars = domU_write_console, .notifier_add = notifier_add_irq, .notifier_del = notifier_del_irq, .notifier_hangup = notifier_hangup_irq, }; -static int __init xen_init(void) +static int dom0_read_console(uint32_t vtermno, char *buf, int len) +{ + return HYPERVISOR_console_io(CONSOLEIO_read, len, buf); +} + +/* + * Either for a dom0 to write to the system console, or a domU with a + * debug version of Xen + */ +static int dom0_write_console(uint32_t vtermno, const char *str, int len) +{ + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc < 0) + return 0; + + return len; +} + +static struct hv_ops dom0_hvc_ops = { + .get_chars = dom0_read_console, + .put_chars = dom0_write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, + .notifier_hangup = notifier_hangup_irq, +}; + +static int __init xen_hvc_init(void) { struct hvc_struct *hp; + struct hv_ops *ops; - if (!xen_pv_domain() || - xen_initial_domain() || - !xen_start_info->console.domU.evtchn) + if (!xen_pv_domain()) return -ENODEV; - xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + if (xen_initial_domain()) { + ops = &dom0_hvc_ops; + xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); + } else { + if (!xen_start_info->console.domU.evtchn) + return -ENODEV; + + ops = &domU_hvc_ops; + xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + } if (xencons_irq < 0) xencons_irq = 0; /* NO_IRQ */ - hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); + hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); @@ -160,7 +194,7 @@ void xen_console_resume(void) rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); } -static void __exit xen_fini(void) +static void __exit xen_hvc_fini(void) { if (hvc) hvc_remove(hvc); @@ -168,29 +202,24 @@ static void __exit xen_fini(void) static int xen_cons_init(void) { + struct hv_ops *ops; + if (!xen_pv_domain()) return 0; - hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); + if (xen_initial_domain()) + ops = &dom0_hvc_ops; + else + ops = &domU_hvc_ops; + + hvc_instantiate(HVC_COOKIE, 0, ops); return 0; } -module_init(xen_init); -module_exit(xen_fini); +module_init(xen_hvc_init); +module_exit(xen_hvc_fini); console_initcall(xen_cons_init); -static void raw_console_write(const char *str, int len) -{ - while(len > 0) { - int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); - if (rc <= 0) - break; - - str += rc; - len -= rc; - } -} - #ifdef CONFIG_EARLY_PRINTK static void xenboot_write_console(struct console *console, const char *string, unsigned len) @@ -198,19 +227,22 @@ static void xenboot_write_console(struct console *console, const char *string, unsigned int linelen, off = 0; const char *pos; - raw_console_write(string, len); + dom0_write_console(0, string, len); + + if (xen_initial_domain()) + return; - write_console(0, "(early) ", 8); + domU_write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) break; - write_console(0, string+off, linelen); - write_console(0, "\r\n", 2); + domU_write_console(0, string+off, linelen); + domU_write_console(0, "\r\n", 2); off += linelen + 1; } if (off < len) - write_console(0, string+off, len-off); + domU_write_console(0, string+off, len-off); } struct console xenboot_console = { @@ -222,7 +254,7 @@ struct console xenboot_console = { void xen_raw_console_write(const char *str) { - raw_console_write(str, strlen(str)); + dom0_write_console(0, str, strlen(str)); } void xen_raw_printk(const char *fmt, ...) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index a7d9555..93e98ff 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -839,7 +839,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } -static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; int evtchn, irq; diff --git a/include/xen/events.h b/include/xen/events.h index 8fa27dc..646dd17 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -12,6 +12,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_virq_to_irq(unsigned int virq, unsigned int cpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, -- cgit v0.10.2 From ff12849a7a187e17fcbd888b39850d22103395c6 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 28 Sep 2010 16:45:51 +0100 Subject: xen: mask the MTRR feature from the cpuid We don't want Linux to think that the cpu supports MTRRs when running under Xen because MTRR operations could only be performed through hypercalls. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9efb004..d48a32b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -243,6 +243,7 @@ static __init void xen_init_cpuid_mask(void) cpuid_leaf1_edx_mask = ~((1 << X86_FEATURE_MCE) | /* disable MCE */ (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_MTRR) | /* disable MTRR */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ if (!xen_initial_domain()) -- cgit v0.10.2 From 0e058e527784a9a23f7ed7a73ffafebb53a889da Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 21 Oct 2010 17:40:08 +0100 Subject: xen: add a missing #include to arch/x86/pci/xen.c Add missing #include to arch/x86/pci/xen.c. Signed-off-by: Stefano Stabellini diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index b3f4b30..117f5b8 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -10,6 +10,7 @@ #include #include +#include #include #include -- cgit v0.10.2 From ea5b8f73933e34d2b47a65284c46d26d49e7edb9 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 26 Oct 2010 17:28:33 +0100 Subject: xen: initialize cpu masks for pv guests in xen_smp_init Pv guests don't have ACPI and need the cpu masks to be set correctly as early as possible so we call xen_fill_possible_map from xen_smp_init. On the other hand the initial domain supports ACPI so in this case we skip xen_fill_possible_map and rely on it. However Xen might limit the number of cpus usable by the domain, so we filter those masks during smp initialization using the VCPUOP_is_up hypercall. It is important that the filtering is done before xen_setup_vcpu_info_placement. Signed-off-by: Stefano Stabellini diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 1386767..834dfeb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -156,6 +157,25 @@ static void __init xen_fill_possible_map(void) { int i, rc; + if (xen_initial_domain()) + return; + + for (i = 0; i < nr_cpu_ids; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) { + num_processors++; + set_cpu_possible(i, true); + } + } +} + +static void __init xen_filter_cpu_maps(void) +{ + int i, rc; + + if (!xen_initial_domain()) + return; + num_processors = 0; disabled_cpus = 0; for (i = 0; i < nr_cpu_ids; i++) { @@ -179,6 +199,7 @@ static void __init xen_smp_prepare_boot_cpu(void) old memory can be recycled */ make_lowmem_page_readwrite(xen_initial_gdt); + xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } @@ -195,8 +216,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - xen_fill_possible_map(); - if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) panic("could not allocate xen_cpu_initialized_map\n"); @@ -487,5 +506,6 @@ static const struct smp_ops xen_smp_ops __initdata = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; + xen_fill_possible_map(); xen_init_spinlocks(); } -- cgit v0.10.2 From e28c31a96b1570f17731b18e8efabb7308d0c22c Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Wed, 27 Oct 2010 17:55:04 +0100 Subject: xen: register xen pci notifier Register a pci notifier to add (or remove) pci devices to Xen via hypercalls. Xen needs to know the pci devices present in the system to handle pci passthrough and even MSI remapping in the initial domain. Signed-off-by: Weidong Han Signed-off-by: Qing He Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index b978645..eb8a78d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o +obj-$(CONFIG_XEN_DOM0) += pci.o diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c new file mode 100644 index 0000000..cef4bafc0 --- /dev/null +++ b/drivers/xen/pci.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Weidong Han + */ + +#include +#include +#include +#include + +#include +#include +#include "../pci/pci.h" + +static int xen_add_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_virtfn = 1, + .physfn.bus = pci_dev->physfn->bus->number, + .physfn.devfn = pci_dev->physfn->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_extfn = 1, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, + &manage_pci); + } + + return r; +} + +static int xen_remove_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + + return r; +} + +static int xen_pci_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + int r = 0; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_add_device(dev); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_remove_device(dev); + break; + default: + break; + } + + return r; +} + +struct notifier_block device_nb = { + .notifier_call = xen_pci_notifier, +}; + +static int __init register_xen_pci_notifier(void) +{ + if (!xen_initial_domain()) + return 0; + + return bus_register_notifier(&pci_bus_type, &device_nb); +} + +arch_initcall(register_xen_pci_notifier); diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a85d76c..2b2c66c 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -136,6 +136,27 @@ struct physdev_unmap_pirq { int pirq; }; +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. -- cgit v0.10.2