From e3f0f36ddf1b2743a0d4ea312996536a9c37e1c7 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 18 Jan 2013 12:58:47 -0500 Subject: x86/apic: Remove noisy zero-mask warning from default_send_IPI_mask_logical() Since circa 3.5, we've had dozens of reports of people hitting this warning. Forwarded reports have been met with silence, so just remove the warning if no-one cares. Example reports: https://bugzilla.redhat.com/show_bug.cgi?id=797687 https://bugzilla.redhat.com/show_bug.cgi?id=867174 https://bugzilla.redhat.com/show_bug.cgi?id=894865 Signed-off-by: Dave Jones Cc: Andrew Morton Link: http://lkml.kernel.org/r/20130118175847.GA27662@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index cce91bf..7434d85 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; - if (WARN_ONCE(!mask, "empty IPI mask")) + if (!mask) return; local_irq_save(flags); -- cgit v0.10.2 From 4cca6ea04d31c22a7d0436949c072b27bde41f86 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Thu, 17 Jan 2013 15:44:42 -0800 Subject: x86/apic: Allow x2apic without IR on VMware platform This patch updates x2apic initializaition code to allow x2apic on VMware platform even without interrupt remapping support. The hypervisor_x2apic_available hook was added in x2apic initialization code and used by KVM and XEN, before this. I have also cleaned up that code to export this hook through the hypervisor_x86 structure. Compile tested for KVM and XEN configs, this patch doesn't have any functional effect on those two platforms. On VMware platform, verified that x2apic is used in physical mode on products that support this. Signed-off-by: Alok N Kataria Reviewed-by: Doug Covelli Reviewed-by: Dan Hecht Acked-by: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Avi Kivity Link: http://lkml.kernel.org/r/1358466282.423.60.camel@akataria-dtop.eng.vmware.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b518c75..86095ed 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -25,6 +25,7 @@ extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); +extern bool hypervisor_x2apic_available(void); /* * x86 hypervisor information @@ -41,6 +42,9 @@ struct hypervisor_x86 { /* Platform setup (run once per boot) */ void (*init_platform)(void); + + /* X2APIC detection (run once per boot) */ + bool (*x2apic_available)(void); }; extern const struct hypervisor_x86 *x86_hyper; @@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; -static inline bool hypervisor_x2apic_available(void) -{ - if (kvm_para_available()) - return true; - if (xen_x2apic_para_available()) - return true; - return false; -} - #endif diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index a8f8fa9..1e7e84a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void) if (x86_hyper->init_platform) x86_hyper->init_platform(); } + +bool __init hypervisor_x2apic_available(void) +{ + return x86_hyper && + x86_hyper->x2apic_available && + x86_hyper->x2apic_available(); +} diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d22d0c4..03a3632 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -33,6 +33,9 @@ #define VMWARE_PORT_CMD_GETVERSION 10 #define VMWARE_PORT_CMD_GETHZ 45 +#define VMWARE_PORT_CMD_GETVCPU_INFO 68 +#define VMWARE_PORT_CMD_LEGACY_X2APIC 3 +#define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ __asm__("inl (%%dx)" : \ @@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); } +/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ +static bool __init vmware_legacy_x2apic_available(void) +{ + uint32_t eax, ebx, ecx, edx; + VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx); + return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 && + (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; +} + const __refconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, .set_cpu_features = vmware_set_cpu_features, .init_platform = vmware_platform_setup, + .x2apic_available = vmware_legacy_x2apic_available, }; EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9c2bd8b..2b44ea5 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -505,6 +505,7 @@ static bool __init kvm_detect(void) const struct hypervisor_x86 x86_hyper_kvm __refconst = { .name = "KVM", .detect = kvm_detect, + .x2apic_available = kvm_para_available, }; EXPORT_SYMBOL_GPL(x86_hyper_kvm); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e566..8b4c56d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1668,6 +1668,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { .name = "Xen HVM", .detect = xen_hvm_platform, .init_platform = xen_hvm_guest_init, + .x2apic_available = xen_x2apic_para_available, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); #endif -- cgit v0.10.2 From 51906e779f2b13b38f8153774c4c7163d412ffd9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:01:29 +0100 Subject: x86/MSI: Support multiple MSIs in presense of IRQ remapping The MSI specification has several constraints in comparison with MSI-X, most notable of them is the inability to configure MSIs independently. As a result, it is impossible to dispatch interrupts from different queues to different CPUs. This is largely devalues the support of multiple MSIs in SMP systems. Also, a necessity to allocate a contiguous block of vector numbers for devices capable of multiple MSIs might cause a considerable pressure on x86 interrupt vector allocator and could lead to fragmentation of the interrupt vectors space. This patch overcomes both drawbacks in presense of IRQ remapping and lets devices take advantage of multiple queues and per-IRQ affinity assignments. Signed-off-by: Alexander Gordeev Cc: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Jeff Garzik Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c8bd86ff56b5fc118257436768aaa04489ac0a4c.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b739d39..2016f9d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -300,9 +300,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } -static int alloc_irq_from(unsigned int from, int node) +static int alloc_irqs_from(unsigned int from, unsigned int count, int node) { - return irq_alloc_desc_from(from, node); + return irq_alloc_descs_from(from, count, node); } static void free_irq_at(unsigned int at, struct irq_cfg *cfg) @@ -2982,37 +2982,58 @@ device_initcall(ioapic_init_ops); /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int from, int node) +unsigned int __create_irqs(unsigned int from, unsigned int count, int node) { - struct irq_cfg *cfg; + struct irq_cfg **cfg; unsigned long flags; - unsigned int ret = 0; - int irq; + int irq, i; if (from < nr_irqs_gsi) from = nr_irqs_gsi; - irq = alloc_irq_from(from, node); - if (irq < 0) - return 0; - cfg = alloc_irq_cfg(irq, node); - if (!cfg) { - free_irq_at(irq, NULL); + cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node); + if (!cfg) return 0; + + irq = alloc_irqs_from(from, count, node); + if (irq < 0) + goto out_cfgs; + + for (i = 0; i < count; i++) { + cfg[i] = alloc_irq_cfg(irq + i, node); + if (!cfg[i]) + goto out_irqs; } raw_spin_lock_irqsave(&vector_lock, flags); - if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) - ret = irq; + for (i = 0; i < count; i++) + if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus())) + goto out_vecs; raw_spin_unlock_irqrestore(&vector_lock, flags); - if (ret) { - irq_set_chip_data(irq, cfg); - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } else { - free_irq_at(irq, cfg); + for (i = 0; i < count; i++) { + irq_set_chip_data(irq + i, cfg[i]); + irq_clear_status_flags(irq + i, IRQ_NOREQUEST); } - return ret; + + kfree(cfg); + return irq; + +out_vecs: + for (i--; i >= 0; i--) + __clear_irq_vector(irq + i, cfg[i]); + raw_spin_unlock_irqrestore(&vector_lock, flags); +out_irqs: + for (i = 0; i < count; i++) + free_irq_at(irq + i, cfg[i]); +out_cfgs: + kfree(cfg); + return 0; +} + +unsigned int create_irq_nr(unsigned int from, int node) +{ + return __create_irqs(from, 1, node); } int create_irq(void) @@ -3045,6 +3066,14 @@ void destroy_irq(unsigned int irq) free_irq_at(irq, cfg); } +static inline void destroy_irqs(unsigned int irq, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + destroy_irq(irq + i); +} + /* * MSI message composition */ @@ -3071,7 +3100,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, if (irq_remapped(cfg)) { compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); - return err; + return 0; } if (x2apic_enabled()) @@ -3098,7 +3127,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); - return err; + return 0; } static int @@ -3136,18 +3165,26 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; int ret; ret = msi_compose_msg(dev, irq, &msg, -1); if (ret < 0) return ret; - irq_set_msi_desc(irq, msidesc); - write_msi_msg(irq, &msg); + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + write_msi_msg(irq, &msg); if (irq_remapped(irq_get_chip_data(irq))) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); @@ -3161,23 +3198,19 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +int setup_msix_irqs(struct pci_dev *dev, int nvec) { int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; - /* x86 doesn't support multiple MSI yet */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) - return -1; + return -ENOSPC; irq_want = irq + 1; if (!irq_remapping_enabled) goto no_ir; @@ -3199,7 +3232,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) goto error; } no_ir: - ret = setup_msi_irq(dev, msidesc, irq); + ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; sub_handle++; @@ -3211,6 +3244,74 @@ error: return ret; } +int setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + if (nvec > 1 && !irq_remapping_enabled) + return 1; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(nr_irqs_gsi, nvec, node); + if (irq == 0) + return -ENOSPC; + + if (!irq_remapping_enabled) { + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + return 0; + } + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return setup_msi_irqs(dev, nvec); + return setup_msix_irqs(dev, nvec); +} + void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/include/linux/irq.h b/include/linux/irq.h index fdf2c4a..1eab991 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -528,6 +528,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); +extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry); extern struct irq_data *irq_get_irq_data(unsigned int irq); static inline struct irq_chip *irq_get_chip(unsigned int irq) @@ -590,6 +592,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, #define irq_alloc_desc_from(from, node) \ irq_alloc_descs(-1, from, 1, node) +#define irq_alloc_descs_from(from, cnt, node) \ + irq_alloc_descs(-1, from, cnt, node) + void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3aca9f2..cbd97ce 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -90,27 +90,41 @@ int irq_set_handler_data(unsigned int irq, void *data) EXPORT_SYMBOL(irq_set_handler_data); /** - * irq_set_msi_desc - set MSI descriptor data for an irq - * @irq: Interrupt number - * @entry: Pointer to MSI descriptor data + * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset + * @irq_base: Interrupt number base + * @irq_offset: Interrupt number offset + * @entry: Pointer to MSI descriptor data * - * Set the MSI descriptor entry for an irq + * Set the MSI descriptor entry for an irq at offset */ -int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) +int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry) { unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL); if (!desc) return -EINVAL; desc->irq_data.msi_desc = entry; - if (entry) - entry->irq = irq; + if (entry && !irq_offset) + entry->irq = irq_base; irq_put_desc_unlock(desc, flags); return 0; } /** + * irq_set_msi_desc - set MSI descriptor data for an irq + * @irq: Interrupt number + * @entry: Pointer to MSI descriptor data + * + * Set the MSI descriptor entry for an irq + */ +int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) +{ + return irq_set_msi_desc_off(irq, 0, entry); +} + +/** * irq_set_chip_data - set irq chip data for an irq * @irq: Interrupt number * @data: Pointer to chip specific data -- cgit v0.10.2 From 08261d87f7d1b6253ab3223756625a5c74532293 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:02:10 +0100 Subject: PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto() The new function pci_enable_msi_block_auto() tries to allocate maximum possible number of MSIs up to the number the device supports. It generalizes a pattern when pci_enable_msi_block() is contiguously called until it succeeds or fails. Opposite to pci_enable_msi_block() which takes the number of MSIs to allocate as a input parameter, pci_enable_msi_block_auto() could be used by device drivers to obtain the number of assigned MSIs and the number of MSIs the device supports. Signed-off-by: Alexander Gordeev Acked-by: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Jeff Garzik Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c3de2419df94a0f95ca1a6f755afc421486455e6.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 53e6fca..a091780 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block() returns as soon as it finds any constraint that doesn't allow the call to succeed. -4.2.3 pci_disable_msi +4.2.3 pci_enable_msi_block_auto + +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count) + +This variation on pci_enable_msi() call allows a device driver to request +the maximum possible number of MSIs. The MSI specification only allows +interrupts to be allocated in powers of two, up to a maximum of 2^5 (32). + +If this function returns a positive number, it indicates that it has +succeeded and the returned value is the number of allocated interrupts. In +this case, the function enables MSI on this device and updates dev->irq to +be the lowest of the new interrupts assigned to it. The other interrupts +assigned to the device are in the range dev->irq to dev->irq + returned +value - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. + +If the device driver needs to know the number of interrupts the device +supports it can pass the pointer count where that number is stored. The +device driver must decide what action to take if pci_enable_msi_block_auto() +succeeds, but returns a value less than the number of interrupts supported. +If the device driver does not need to know the number of interrupts +supported, it can set the pointer count to NULL. + +4.2.4 pci_disable_msi void pci_disable_msi(struct pci_dev *dev) This function should be used to undo the effect of pci_enable_msi() or -pci_enable_msi_block(). Calling it restores dev->irq to the pin-based -interrupt number and frees the previously allocated message signaled -interrupt(s). The interrupt may subsequently be assigned to another -device, so drivers should not cache the value of dev->irq. +pci_enable_msi_block() or pci_enable_msi_block_auto(). Calling it restores +dev->irq to the pin-based interrupt number and frees the previously +allocated message signaled interrupt(s). The interrupt may subsequently be +assigned to another device, so drivers should not cache the value of +dev->irq. Before calling this function, a device driver must always call free_irq() on any interrupt for which it previously called request_irq(). diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5099636..00cc78c7 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) } EXPORT_SYMBOL(pci_enable_msi_block); +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + int ret, pos, nvec; + u16 msgctl; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + + if (maxvec) + *maxvec = ret; + + do { + nvec = ret; + ret = pci_enable_msi_block(dev, nvec); + } while (ret > 0); + + if (ret < 0) + return ret; + return nvec; +} +EXPORT_SYMBOL(pci_enable_msi_block_auto); + void pci_msi_shutdown(struct pci_dev *dev) { struct msi_desc *desc; diff --git a/include/linux/pci.h b/include/linux/pci.h index 15472d6..6fa4dd2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) return -1; } +static inline int +pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + return -1; +} + static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) @@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void) } #else extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); +extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); extern int pci_msix_table_size(struct pci_dev *dev); -- cgit v0.10.2 From 5ca72c4f7c412c2002363218901eba5516c476b1 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:02:48 +0100 Subject: AHCI: Support multiple MSIs Take advantage of multiple MSIs implementation on x86 - on systems with IRQ remapping AHCI ports not only get assigned separate MSI vectors - but also separate IRQs. As result, interrupts generated by different ports could be serviced on different CPUs rather than on a single one. In cases when number of allocated MSIs is less than requested the Sharing Last MSI mode does not get used, no matter implemented in hardware or not. Instead, the driver assumes the advantage of multiple MSIs is negated and falls back to the single MSI mode as if MRSM bit was set (some Intel chips implement this strategy anyway - MRSM bit gets set even if the number of allocated MSIs exceeds the number of implemented ports). Signed-off-by: Alexander Gordeev Acked-by: Jeff Garzik Cc: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/15bf7ee314dd55f21ec7d2a01c47613cd8190a7c.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 4979127..495aeed 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1061,6 +1061,86 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host) {} #endif +int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv) +{ + int rc; + unsigned int maxvec; + + if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) { + rc = pci_enable_msi_block_auto(pdev, &maxvec); + if (rc > 0) { + if ((rc == maxvec) || (rc == 1)) + return rc; + /* + * Assume that advantage of multipe MSIs is negated, + * so fallback to single MSI mode to save resources + */ + pci_disable_msi(pdev); + if (!pci_enable_msi(pdev)) + return 1; + } + } + + pci_intx(pdev, 1); + return 0; +} + +/** + * ahci_host_activate - start AHCI host, request IRQs and register it + * @host: target ATA host + * @irq: base IRQ number to request + * @n_msis: number of MSIs allocated for this host + * @irq_handler: irq_handler used when requesting IRQs + * @irq_flags: irq_flags used when requesting IRQs + * + * Similar to ata_host_activate, but requests IRQs according to AHCI-1.1 + * when multiple MSIs were allocated. That is one MSI per port, starting + * from @irq. + * + * LOCKING: + * Inherited from calling layer (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise. + */ +int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis) +{ + int i, rc; + + /* Sharing Last Message among several ports is not supported */ + if (n_msis < host->n_ports) + return -EINVAL; + + rc = ata_host_start(host); + if (rc) + return rc; + + for (i = 0; i < host->n_ports; i++) { + rc = devm_request_threaded_irq(host->dev, + irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED, + dev_driver_string(host->dev), host->ports[i]); + if (rc) + goto out_free_irqs; + } + + for (i = 0; i < host->n_ports; i++) + ata_port_desc(host->ports[i], "irq %d", irq + i); + + rc = ata_host_register(host, &ahci_sht); + if (rc) + goto out_free_all_irqs; + + return 0; + +out_free_all_irqs: + i = host->n_ports; +out_free_irqs: + for (i--; i >= 0; i--) + devm_free_irq(host->dev, irq + i, host->ports[i]); + + return rc; +} + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned int board_id = ent->driver_data; @@ -1069,7 +1149,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; struct ata_host *host; - int n_ports, i, rc; + int n_ports, n_msis, i, rc; int ahci_pci_bar = AHCI_PCI_BAR_STANDARD; VPRINTK("ENTER\n"); @@ -1156,11 +1236,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ahci_sb600_enable_64bit(pdev)) hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY; - if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev)) - pci_intx(pdev, 1); - hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; + n_msis = ahci_init_interrupts(pdev, hpriv); + if (n_msis > 1) + hpriv->flags |= AHCI_HFLAG_MULTI_MSI; + /* save initial config */ ahci_pci_save_initial_config(pdev, hpriv); @@ -1256,6 +1337,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_pci_print_info(host); pci_set_master(pdev); + + if (hpriv->flags & AHCI_HFLAG_MULTI_MSI) + return ahci_host_activate(host, pdev->irq, n_msis); + return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED, &ahci_sht); } diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 9be4712..b830e6c 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -231,6 +231,7 @@ enum { AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on port start (wait until error-handling stage) */ + AHCI_HFLAG_MULTI_MSI = (1 << 16), /* multiple PCI MSIs */ /* ap->flags bits */ @@ -297,6 +298,8 @@ struct ahci_port_priv { unsigned int ncq_saw_d2h:1; unsigned int ncq_saw_dmas:1; unsigned int ncq_saw_sdb:1; + u32 intr_status; /* interrupts to handle */ + spinlock_t lock; /* protects parent ata_port */ u32 intr_mask; /* interrupts to enable */ bool fbs_supported; /* set iff FBS is supported */ bool fbs_enabled; /* set iff FBS is enabled */ @@ -359,7 +362,10 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv, struct ata_port_info *pi); int ahci_reset_em(struct ata_host *host); irqreturn_t ahci_interrupt(int irq, void *dev_instance); +irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance); +irqreturn_t ahci_thread_fn(int irq, void *dev_instance); void ahci_print_info(struct ata_host *host, const char *scc_s); +int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis); static inline void __iomem *__ahci_port_base(struct ata_host *host, unsigned int port_no) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 6cd7805..34c8216 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1655,19 +1655,16 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) ata_port_abort(ap); } -static void ahci_port_intr(struct ata_port *ap) +static void ahci_handle_port_interrupt(struct ata_port *ap, + void __iomem *port_mmio, u32 status) { - void __iomem *port_mmio = ahci_port_base(ap); struct ata_eh_info *ehi = &ap->link.eh_info; struct ahci_port_priv *pp = ap->private_data; struct ahci_host_priv *hpriv = ap->host->private_data; int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING); - u32 status, qc_active = 0; + u32 qc_active = 0; int rc; - status = readl(port_mmio + PORT_IRQ_STAT); - writel(status, port_mmio + PORT_IRQ_STAT); - /* ignore BAD_PMP while resetting */ if (unlikely(resetting)) status &= ~PORT_IRQ_BAD_PMP; @@ -1743,6 +1740,107 @@ static void ahci_port_intr(struct ata_port *ap) } } +void ahci_port_intr(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 status; + + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); + + ahci_handle_port_interrupt(ap, port_mmio, status); +} + +irqreturn_t ahci_thread_fn(int irq, void *dev_instance) +{ + struct ata_port *ap = dev_instance; + struct ahci_port_priv *pp = ap->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + unsigned long flags; + u32 status; + + spin_lock_irqsave(&ap->host->lock, flags); + status = pp->intr_status; + if (status) + pp->intr_status = 0; + spin_unlock_irqrestore(&ap->host->lock, flags); + + spin_lock_bh(ap->lock); + ahci_handle_port_interrupt(ap, port_mmio, status); + spin_unlock_bh(ap->lock); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(ahci_thread_fn); + +void ahci_hw_port_interrupt(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + struct ahci_port_priv *pp = ap->private_data; + u32 status; + + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); + + pp->intr_status |= status; +} + +irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance) +{ + struct ata_port *ap_this = dev_instance; + struct ahci_port_priv *pp = ap_this->private_data; + struct ata_host *host = ap_this->host; + struct ahci_host_priv *hpriv = host->private_data; + void __iomem *mmio = hpriv->mmio; + unsigned int i; + u32 irq_stat, irq_masked; + + VPRINTK("ENTER\n"); + + spin_lock(&host->lock); + + irq_stat = readl(mmio + HOST_IRQ_STAT); + + if (!irq_stat) { + u32 status = pp->intr_status; + + spin_unlock(&host->lock); + + VPRINTK("EXIT\n"); + + return status ? IRQ_WAKE_THREAD : IRQ_NONE; + } + + irq_masked = irq_stat & hpriv->port_map; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap; + + if (!(irq_masked & (1 << i))) + continue; + + ap = host->ports[i]; + if (ap) { + ahci_hw_port_interrupt(ap); + VPRINTK("port %u\n", i); + } else { + VPRINTK("port %u (no irq)\n", i); + if (ata_ratelimit()) + dev_warn(host->dev, + "interrupt on disabled port %u\n", i); + } + } + + writel(irq_stat, mmio + HOST_IRQ_STAT); + + spin_unlock(&host->lock); + + VPRINTK("EXIT\n"); + + return IRQ_WAKE_THREAD; +} +EXPORT_SYMBOL_GPL(ahci_hw_interrupt); + irqreturn_t ahci_interrupt(int irq, void *dev_instance) { struct ata_host *host = dev_instance; @@ -2196,6 +2294,14 @@ static int ahci_port_start(struct ata_port *ap) */ pp->intr_mask = DEF_PORT_IRQ; + /* + * Switch to per-port locking in case each port has its own MSI vector. + */ + if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) { + spin_lock_init(&pp->lock); + ap->lock = &pp->lock; + } + ap->private_data = pp; /* engage engines, captain */ -- cgit v0.10.2 From 70733e0c7ed22177e2cfe660fa2a0e90f1f39126 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:33 +0200 Subject: x86, apic: Move irq_remapping_enabled checks into IRQ-remapping code Move the three easy to move checks in the x86' apic.c file into the IRQ-remapping code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b994cc8..8d741e6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (irq_remapping_enabled) - irq_remap_enable_fault_handling(); + irq_remap_enable_fault_handling(); } @@ -2251,8 +2250,7 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (irq_remapping_enabled) - irq_remapping_disable(); + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2320,8 +2318,7 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (irq_remapping_enabled) - irq_remapping_reenable(x2apic_mode); + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index faf85d6..19381b9 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -87,7 +87,9 @@ int __init irq_remapping_enable(void) void irq_remapping_disable(void) { - if (!remap_ops || !remap_ops->disable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->disable) return; remap_ops->disable(); @@ -95,7 +97,9 @@ void irq_remapping_disable(void) int irq_remapping_reenable(int mode) { - if (!remap_ops || !remap_ops->reenable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->reenable) return 0; return remap_ops->reenable(mode); @@ -103,6 +107,9 @@ int irq_remapping_reenable(int mode) int __init irq_remap_enable_fault_handling(void) { + if (!irq_remapping_enabled) + return 0; + if (!remap_ops || !remap_ops->enable_faulting) return -ENODEV; -- cgit v0.10.2 From 336224ba5e4fb42a95d02ab0aa0fdff21649bb38 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:34 +0200 Subject: x86, apic: Mask IO-APIC and PIC unconditionally on LAPIC resume IO-APIC and PIC use the same resume routines when IRQ remapping is enabled or disabled. So it should be safe to mask the other APICs for the IRQ-remapping-disabled case too. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8d741e6..a5b4dce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2266,16 +2266,15 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (irq_remapping_enabled) { - /* - * IO-APIC and PIC have their own resume routines. - * We just mask them here to make sure the interrupt - * subsystem is completely quiet while we enable x2apic - * and interrupt-remapping. - */ - mask_ioapic_entries(); - legacy_pic->mask_all(); - } + + /* + * IO-APIC and PIC have their own resume routines. + * We just mask them here to make sure the interrupt + * subsystem is completely quiet while we enable x2apic + * and interrupt-remapping. + */ + mask_ioapic_entries(); + legacy_pic->mask_all(); if (x2apic_mode) enable_x2apic(); -- cgit v0.10.2 From 1c4248ca4e783e47cc34e313d9f82b4ea52774cc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:35 +0200 Subject: x86, io_apic: Introduce x86_io_apic_ops.disable() This function pointer is used to call a system-specific function for disabling the IO-APIC. Currently this is used for IRQ remapping which has its own disable routine. Also introduce the necessary infrastructure in the interrupt remapping code to overwrite this and other function pointers as necessary by interrupt remapping. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 73d8c53..d59e172 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -179,6 +179,7 @@ extern void __init native_io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); +extern void native_disable_io_apic(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -223,6 +224,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_read NULL #define native_io_apic_write NULL #define native_io_apic_modify NULL +#define native_disable_io_apic NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 5769349..b1d2d6a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,10 +190,11 @@ struct x86_msi_ops { }; struct x86_io_apic_ops { - void (*init) (void); - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); + void (*disable)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2016f9d..cd5f4d7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1921,30 +1921,14 @@ void __init enable_IO_APIC(void) clear_IO_APIC(); } -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) +void native_disable_io_apic(void) { /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - if (!legacy_pic->nr_legacy_irqs) - return; - - /* * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. - * - * With interrupt-remapping, for now we will use virtual wire A mode, - * as virtual wire B is little complex (need to configure both - * IOAPIC RTE as well as interrupt-remapping table entry). - * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -1964,12 +1948,25 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(ioapic_i8259.pin != -1); + +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ /* - * Use virtual wire A mode when interrupt remapping is enabled. + * Clear the IO-APIC before rebooting: */ - if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!irq_remapping_enabled && - ioapic_i8259.pin != -1); + clear_IO_APIC(); + + if (!legacy_pic->nr_legacy_irqs) + return; + + x86_io_apic_ops.disable(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075..754524a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -118,8 +118,9 @@ struct x86_msi_ops x86_msi = { }; struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, - .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, + .init = native_io_apic_init_mappings, + .read = native_io_apic_read, + .write = native_io_apic_write, + .modify = native_io_apic_modify, + .disable = native_disable_io_apic, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 19381b9..db3dcaf 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -6,6 +7,9 @@ #include #include +#include +#include +#include #include "irq_remapping.h" @@ -17,6 +21,24 @@ int no_x2apic_optout; static struct irq_remap_ops *remap_ops; +static void irq_remapping_disable_io_apic(void) +{ + /* + * With interrupt-remapping, for now we will use virtual wire A + * mode, as virtual wire B is little complex (need to configure + * both IOAPIC RTE as well as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for + * now. + */ + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(0); +} + +static void __init irq_remapping_modify_x86_ops(void) +{ + x86_io_apic_ops.disable = irq_remapping_disable_io_apic; +} + static __init int setup_nointremap(char *str) { disable_irq_remap = 1; @@ -79,10 +101,17 @@ int __init irq_remapping_prepare(void) int __init irq_remapping_enable(void) { + int ret; + if (!remap_ops || !remap_ops->enable) return -ENODEV; - return remap_ops->enable(); + ret = remap_ops->enable(); + + if (irq_remapping_enabled) + irq_remapping_modify_x86_ops(); + + return ret; } void irq_remapping_disable(void) -- cgit v0.10.2 From afcc8a40a090f7a65d3b72bac1a26fc6dbb63b10 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:36 +0200 Subject: x86, io_apic: Introduce x86_io_apic_ops.print_entries for debugging This call-back is used to dump IO-APIC entries for debugging purposes into the kernel log. VT-d needs a special routine for this and will overwrite the default. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index d59e172..21aa81e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -180,6 +180,8 @@ extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); +extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -225,6 +227,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_write NULL #define native_io_apic_modify NULL #define native_disable_io_apic NULL +#define native_io_apic_print_entries NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b1d2d6a..8ff79f7 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -195,6 +195,7 @@ struct x86_io_apic_ops { void (*write) (unsigned int apic, unsigned int reg, unsigned int value); void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); + void (*print_entries)(unsigned int apic, unsigned int nr_entries); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cd5f4d7..a18e27a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1513,9 +1513,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, ioapic_write_entry(ioapic_idx, pin, entry); } -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) +{ + int i; + + pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + pr_debug(" %02x %02X ", i, entry.dest); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector); + } +} + +void intel_ir_io_apic_print_entries(unsigned int apic, + unsigned int nr_entries) { int i; + + pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IR_IO_APIC_route_entry *ir_entry; + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + ir_entry = (struct IR_IO_APIC_route_entry *)&entry; + + pr_debug(" %02x %04X ", i, ir_entry->index); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %X %02X\n", + ir_entry->format, + ir_entry->mask, + ir_entry->trigger, + ir_entry->irr, + ir_entry->polarity, + ir_entry->delivery_status, + ir_entry->index2, + ir_entry->zero, + ir_entry->vector); + } +} + +__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +{ union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; @@ -1568,58 +1622,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (irq_remapping_enabled) { - printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" - " Pol Stat Indx2 Zero Vect:\n"); - } else { - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect:\n"); - } - - for (i = 0; i <= reg_01.bits.entries; i++) { - if (irq_remapping_enabled) { - struct IO_APIC_route_entry entry; - struct IR_IO_APIC_route_entry *ir_entry; - - entry = ioapic_read_entry(ioapic_idx, i); - ir_entry = (struct IR_IO_APIC_route_entry *) &entry; - printk(KERN_DEBUG " %02x %04X ", - i, - ir_entry->index - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector - ); - } else { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(ioapic_idx, i); - printk(KERN_DEBUG " %02x %02X ", - i, - entry.dest - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } + x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); } __apicdebuginit(void) print_IO_APICs(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 754524a..ee4af8b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -123,4 +123,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .write = native_io_apic_write, .modify = native_io_apic_modify, .disable = native_disable_io_apic, + .print_entries = native_io_apic_print_entries, }; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index af8904d..cb9a72d 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -617,6 +617,14 @@ static int __init intel_enable_irq_remapping(void) goto error; irq_remapping_enabled = 1; + + /* + * VT-d has a different layout for IO-APIC entries when + * interrupt remapping is enabled. So it needs a special routine + * to print IO-APIC entries for debugging purposes too. + */ + x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; + pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; -- cgit v0.10.2 From 71054d8841b442bb3d8be60bde2bfac0483c19da Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:37 +0200 Subject: x86, hpet: Introduce x86_msi_ops.setup_hpet_msi This function pointer can be overwritten by the IRQ remapping code. The irq_remapping_enabled check can be removed from default_setup_hpet_msi. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 434e210..b18df57 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); #ifdef CONFIG_PCI_MSI -extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); +extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); #else -static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) { return -EINVAL; } @@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); static inline int hpet_enable(void) { return 0; } static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 +#define default_setup_hpet_msi NULL #endif #endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8ff79f7..1ee10ca 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -187,6 +187,7 @@ struct x86_msi_ops { void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev, int irq); + int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; struct x86_io_apic_ops { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a18e27a..e7b8763 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3399,18 +3399,12 @@ static struct irq_chip hpet_msi_type = { .irq_retrigger = ioapic_retrigger_irq, }; -int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +int default_setup_hpet_msi(unsigned int irq, unsigned int id) { struct irq_chip *chip = &hpet_msi_type; struct msi_msg msg; int ret; - if (irq_remapping_enabled) { - ret = setup_hpet_msi_remapped(irq, id); - if (ret) - return ret; - } - ret = msi_compose_msg(NULL, irq, &msg, id); if (ret < 0) return ret; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e28670f..da85a8e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta, static int hpet_setup_msi_irq(unsigned int irq) { - if (arch_setup_hpet_msi(irq, hpet_blockid)) { + if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { destroy_irq(irq); return -EINVAL; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ee4af8b..0357eee 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -111,10 +112,11 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); struct x86_msi_ops x86_msi = { - .setup_msi_irqs = native_setup_msi_irqs, - .teardown_msi_irq = native_teardown_msi_irq, - .teardown_msi_irqs = default_teardown_msi_irqs, - .restore_msi_irqs = default_restore_msi_irqs, + .setup_msi_irqs = native_setup_msi_irqs, + .teardown_msi_irq = native_teardown_msi_irq, + .teardown_msi_irqs = default_teardown_msi_irqs, + .restore_msi_irqs = default_restore_msi_irqs, + .setup_hpet_msi = default_setup_hpet_msi, }; struct x86_io_apic_ops x86_io_apic_ops = { diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index db3dcaf..0baad3b 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -36,7 +36,8 @@ static void irq_remapping_disable_io_apic(void) static void __init irq_remapping_modify_x86_ops(void) { - x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; } static __init int setup_nointremap(char *str) -- cgit v0.10.2 From 5afba62cc8a16716508605e02c1b02ee5f969184 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:38 +0200 Subject: x86, msi: Use IRQ remapping specific setup_msi_irqs routine Use seperate routines to setup MSI IRQs for both irq_remapping_enabled cases. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5fb9bbb..0ee1e88 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -47,9 +47,6 @@ extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); -extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); #else /* CONFIG_IRQ_REMAP */ @@ -83,15 +80,6 @@ static inline void compose_remapped_msi_msg(struct pci_dev *pdev, struct msi_msg *msg, u8 hpet_id) { } -static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -{ - return -ENODEV; -} -static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) -{ - return -ENODEV; -} static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index dba7805..c28fd02 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) #define arch_teardown_msi_irq x86_teardown_msi_irq #define arch_restore_msi_irqs x86_restore_msi_irqs /* implemented in arch/x86/kernel/apic/io_apic. */ +struct msi_desc; int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev, int irq); +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset); /* default to the implementation in drivers/lib/msi.c */ #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS #define HAVE_DEFAULT_MSI_RESTORE_IRQS diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e7b8763..d4b045e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3066,7 +3066,7 @@ void destroy_irq(unsigned int irq) free_irq_at(irq, cfg); } -static inline void destroy_irqs(unsigned int irq, unsigned int count) +void destroy_irqs(unsigned int irq, unsigned int count) { unsigned int i; @@ -3165,8 +3165,8 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; @@ -3198,44 +3198,28 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, return 0; } -int setup_msix_irqs(struct pci_dev *dev, int nvec) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; + int node, ret; + + /* Multiple MSI vectors only supported with interrupt remapping */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; - sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) return -ENOSPC; + irq_want = irq + 1; - if (!irq_remapping_enabled) - goto no_ir; - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq, index, - sub_handle); - if (ret < 0) - goto error; - } -no_ir: ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; - sub_handle++; } return 0; @@ -3244,74 +3228,6 @@ error: return ret; } -int setup_msi_irqs(struct pci_dev *dev, int nvec) -{ - int node, ret, sub_handle, index = 0; - unsigned int irq; - struct msi_desc *msidesc; - - if (nvec > 1 && !irq_remapping_enabled) - return 1; - - nvec = __roundup_pow_of_two(nvec); - - WARN_ON(!list_is_singular(&dev->msi_list)); - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - WARN_ON(msidesc->irq); - WARN_ON(msidesc->msi_attrib.multiple); - - node = dev_to_node(&dev->dev); - irq = __create_irqs(nr_irqs_gsi, nvec, node); - if (irq == 0) - return -ENOSPC; - - if (!irq_remapping_enabled) { - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) - goto error; - return 0; - } - - msidesc->msi_attrib.multiple = ilog2(nvec); - for (sub_handle = 0; sub_handle < nvec; sub_handle++) { - if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq + sub_handle, - index, sub_handle); - if (ret < 0) - goto error; - } - ret = setup_msi_irq(dev, msidesc, irq, sub_handle); - if (ret < 0) - goto error; - } - return 0; - -error: - destroy_irqs(irq, nvec); - - /* - * Restore altered MSI descriptor fields and prevent just destroyed - * IRQs from tearing down again in default_teardown_msi_irqs() - */ - msidesc->irq = 0; - msidesc->msi_attrib.multiple = 0; - - return ret; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSI) - return setup_msi_irqs(dev, nvec); - return setup_msix_irqs(dev, nvec); -} - void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 0baad3b..20f04b6 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -21,6 +23,10 @@ int no_x2apic_optout; static struct irq_remap_ops *remap_ops; +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); + static void irq_remapping_disable_io_apic(void) { /* @@ -34,9 +40,109 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } +static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); + if (irq == 0) + return -ENOSPC; + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + struct msi_desc *msidesc; + unsigned int irq; + + node = dev_to_node(&dev->dev); + irq = get_nr_irqs_gsi(); + sub_handle = 0; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + + irq = create_irq_nr(irq, node); + if (irq == 0) + return -1; + + if (sub_handle == 0) + ret = index = msi_alloc_remapped_irq(dev, irq, nvec); + else + ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); + + if (ret < 0) + goto error; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + + sub_handle += 1; + irq += 1; + } + + return 0; + +error: + destroy_irq(irq); + return ret; +} + +static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, + int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return do_setup_msi_irqs(dev, nvec); + else + return do_setup_msix_irqs(dev, nvec); +} + static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; } @@ -186,7 +292,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } -int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { if (!remap_ops || !remap_ops->msi_alloc_irq) return -ENODEV; @@ -194,8 +300,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) return remap_ops->msi_alloc_irq(pdev, irq, nvec); } -int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { if (!remap_ops || !remap_ops->msi_setup_irq) return -ENODEV; diff --git a/include/linux/irq.h b/include/linux/irq.h index 1eab991..bc4e066 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) /* Handle dynamic irq creation and destruction */ extern unsigned int create_irq_nr(unsigned int irq_want, int node); +extern unsigned int __create_irqs(unsigned int from, unsigned int count, + int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); +extern void destroy_irqs(unsigned int irq, unsigned int count); /* * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and -- cgit v0.10.2 From 373dd7a27f2469020e7b56744cf47b82986b9749 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:39 +0200 Subject: x86, io_apic: Introduce set_affinity function pointer With interrupt remapping a special function is used to change the affinity of an IO-APIC interrupt. Abstract this with a function pointer. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 21aa81e..a744cbb 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -182,6 +182,9 @@ extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void native_disable_io_apic(void); extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern int native_ioapic_set_affinity(struct irq_data *, + const struct cpumask *, + bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -228,6 +231,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_modify NULL #define native_disable_io_apic NULL #define native_io_apic_print_entries NULL +#define native_ioapic_set_affinity NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0ee1e88..f1afa04 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -40,9 +40,6 @@ extern int setup_ioapic_remapped_entry(int irq, unsigned int destination, int vector, struct io_apic_irq_attr *attr); -extern int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force); extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, @@ -68,12 +65,6 @@ static inline int setup_ioapic_remapped_entry(int irq, { return -ENODEV; } -static inline int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - return 0; -} static inline void free_remapped_irq(int irq) { } static inline void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1ee10ca..20d9f97 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,6 +190,9 @@ struct x86_msi_ops { int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; +struct irq_data; +struct cpumask; + struct x86_io_apic_ops { void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); @@ -197,6 +200,9 @@ struct x86_io_apic_ops { void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); void (*print_entries)(unsigned int apic, unsigned int nr_entries); + int (*set_affinity)(struct irq_data *data, + const struct cpumask *mask, + bool force); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d4b045e..d9ca3be 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2369,9 +2369,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return 0; } -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) + +int native_ioapic_set_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) { unsigned int dest, irq = data->irq; unsigned long flags; @@ -2570,8 +2571,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_print_chip = ir_print_prefix; chip->irq_ack = ir_ack_apic_edge; chip->irq_eoi = ir_ack_apic_level; - - chip->irq_set_affinity = set_remapped_irq_affinity; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; } #endif /* CONFIG_IRQ_REMAP */ @@ -2582,7 +2582,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, - .irq_set_affinity = ioapic_set_affinity, + .irq_set_affinity = native_ioapic_set_affinity, .irq_retrigger = ioapic_retrigger_irq, }; @@ -3694,10 +3694,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (irq_remapping_enabled) - set_remapped_irq_affinity(idata, mask, false); - else - ioapic_set_affinity(idata, mask, false); + x86_io_apic_ops.set_affinity(idata, mask, false); } } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0357eee..2ca3475 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -126,4 +126,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .modify = native_io_apic_modify, .disable = native_disable_io_apic, .print_entries = native_io_apic_print_entries, + .set_affinity = native_ioapic_set_affinity, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 20f04b6..67b243c 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -26,6 +26,9 @@ static struct irq_remap_ops *remap_ops; static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, int index, int sub_handle); +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); static void irq_remapping_disable_io_apic(void) { @@ -142,6 +145,7 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; } -- cgit v0.10.2 From a6a25dd3270944f3c4182ffcbe0f60482471e849 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:40 +0200 Subject: x86, io_apic: Convert setup_ioapic_entry to function pointer This pointer is changed to a different function when IRQ remapping is enabled. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a744cbb..71f5f08 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -149,6 +149,10 @@ extern int io_apic_set_pci_routing(struct device *dev, int irq, void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_insert_resources(void); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); + int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -232,6 +236,7 @@ static inline void disable_ioapic_support(void) { } #define native_disable_io_apic NULL #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL +#define native_setup_ioapic_entry NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 20d9f97..17da29c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,6 +190,8 @@ struct x86_msi_ops { int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; +struct IO_APIC_route_entry; +struct io_apic_irq_attr; struct irq_data; struct cpumask; @@ -203,6 +205,9 @@ struct x86_io_apic_ops { int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); + int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d9ca3be..9a7131f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1315,14 +1315,10 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } -static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) +int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) { - if (irq_remapping_enabled) - return setup_ioapic_remapped_entry(irq, entry, destination, - vector, attr); - memset(entry, 0, sizeof(*entry)); entry->delivery_mode = apic->irq_delivery_mode; @@ -1370,8 +1366,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, cfg->vector, irq, attr->trigger, attr->polarity, dest); - if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 2ca3475..06db44f 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -127,4 +127,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .disable = native_disable_io_apic, .print_entries = native_io_apic_print_entries, .set_affinity = native_ioapic_set_affinity, + .setup_entry = native_setup_ioapic_entry, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 67b243c..886ad67 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -146,6 +146,7 @@ static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; + x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; } -- cgit v0.10.2 From 6a9f5de27216801b4e38ccd8aa0168a5dd8eca9b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:41 +0200 Subject: x86, io_apic: Move irq_remapping_enabled checks out of check_timer() Move these checks to IRQ remapping code by introducing the panic_on_irq_remap() function. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index f1afa04..fb99a73 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); +extern void panic_if_irq_remap(const char *msg); #else /* CONFIG_IRQ_REMAP */ @@ -75,6 +76,10 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; } + +static inline void panic_if_irq_remap(const char *msg) +{ +} #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9a7131f..aa2b753 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2777,8 +2777,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (irq_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2810,8 +2809,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (irq_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); + panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 886ad67..ebd02bf 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -321,3 +321,9 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) return remap_ops->setup_hpet_msi(irq, id); } + +void panic_if_irq_remap(const char *msg) +{ + if (irq_remapping_enabled) + panic(msg); +} -- cgit v0.10.2 From 1d254428c0ba30a0fbb8112d875ba64f4e60db25 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:42 +0200 Subject: x86, io_apic: Remove irq_remapping_enabled check in setup_timer_IRQ0_pin This function is only called when irq-remapping is disabled. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index aa2b753..ee0757d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1475,9 +1475,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, struct IO_APIC_route_entry entry; unsigned int dest; - if (irq_remapping_enabled) - return; - memset(&entry, 0, sizeof(entry)); /* -- cgit v0.10.2 From 078e1ee26a061663bd7a4773c06b33cdb997380d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:43 +0200 Subject: x86, irq: Move irq_remapping_enabled declaration to iommu code Remove the last left-over from this flag from x86 code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index fb99a73..6f4b48b 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -26,8 +26,6 @@ #ifdef CONFIG_IRQ_REMAP -extern int irq_remapping_enabled; - extern void setup_irq_remapping_ops(void); extern int irq_remapping_supported(void); extern int irq_remapping_prepare(void); @@ -49,8 +47,6 @@ extern void panic_if_irq_remap(const char *msg); #else /* CONFIG_IRQ_REMAP */ -#define irq_remapping_enabled 0 - static inline void setup_irq_remapping_ops(void) { } static inline int irq_remapping_supported(void) { return 0; } static inline int irq_remapping_prepare(void) { return -ENODEV; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 86e2f4a..174bb65 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -41,6 +41,8 @@ #include #include +#include "irq_remapping.h" + /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of * these units are not supported by the architecture. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b9d0911..64ae948 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -46,6 +46,8 @@ #include #include +#include "irq_remapping.h" + #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 95363ac..ecb6376 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -34,6 +34,7 @@ struct msi_msg; extern int disable_irq_remap; extern int disable_sourceid_checking; extern int no_x2apic_optout; +extern int irq_remapping_enabled; struct irq_remap_ops { /* Check whether Interrupt Remapping is supported */ -- cgit v0.10.2 From 819508d302e5b6d6dacb5c3d5e4756091e32cc7d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:44 +0200 Subject: x86, irq: Add data structure to keep AMD specific irq remapping information Add a data structure to store information the IOMMU driver can use to get from a 'struct irq_cfg' to the remapping entry. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6e..fc89a2a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, irq_attr->polarity = polarity; } +/* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -108,6 +109,12 @@ struct irq_2_iommu { u8 irte_mask; }; +/* AMD specific interrupt remapping information */ +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + /* * This is performance-critical, we want to do it O(1) * @@ -120,7 +127,10 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP - struct irq_2_iommu irq_2_iommu; + union { + struct irq_2_iommu irq_2_iommu; + struct irq_2_irte irq_2_irte; + }; #endif }; -- cgit v0.10.2 From 9b1b0e42f54bc452817f4bb6a8d939afe4f04303 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:45 +0200 Subject: x86, io-apic: Move CONFIG_IRQ_REMAP code out of x86 core Move all the code to either to the header file asm/irq_remapping.h or to drivers/iommu/. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index fc89a2a..10a78c3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -127,6 +127,7 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP + u8 remapped : 1; union { struct irq_2_iommu irq_2_iommu; struct irq_2_irte irq_2_irte; diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 71f5f08..36fb5ab 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -144,6 +144,7 @@ extern int timer_through_8259; (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) struct io_apic_irq_attr; +struct irq_cfg; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); @@ -152,6 +153,10 @@ extern void ioapic_insert_resources(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); +extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6f4b48b..562db68 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,13 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip); + #else /* CONFIG_IRQ_REMAP */ static inline void setup_irq_remapping_ops(void) { } @@ -76,6 +83,16 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) static inline void panic_if_irq_remap(const char *msg) { } + +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} + +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} + #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ee0757d..0fd5f30 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -68,22 +68,6 @@ #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) -#ifdef CONFIG_IRQ_REMAP -static void irq_remap_modify_chip_defaults(struct irq_chip *chip); -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return cfg->irq_2_iommu.iommu != NULL; -} -#else -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return false; -} -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ -} -#endif - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -606,7 +590,7 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) } } -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; @@ -2542,32 +2526,6 @@ static void ack_apic_level(struct irq_data *data) ioapic_irqd_unmask(data, cfg, masked); } -#ifdef CONFIG_IRQ_REMAP -static void ir_ack_apic_edge(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); -} - -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); -} - -static void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; - chip->irq_set_affinity = x86_io_apic_ops.set_affinity; -} -#endif /* CONFIG_IRQ_REMAP */ - static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c1c74e0..d33eaaf 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) index -= count - 1; + cfg->remapped = 1; irte_info = &cfg->irq_2_iommu; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; goto out; } @@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, index = attr->ioapic_pin; /* Setup IRQ remapping info */ + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; /* Setup IRTE for IOMMU */ irte.val = 0; @@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, devid = get_device_id(&pdev->dev); irte_info = &cfg->irq_2_iommu; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index + offset; - irte_info->iommu = (void *)cfg; return 0; } @@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (index < 0) return index; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; return 0; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index cb9a72d..7ca4947 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -68,6 +68,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu->ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); u16 index, start_index; unsigned int mask = 0; unsigned long flags; @@ -115,6 +116,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) for (i = index; i < index + count; i++) table->base[i].present = 1; + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -155,6 +157,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle) static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long flags; if (!irq_iommu) @@ -162,6 +165,7 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index ebd02bf..75afdf4 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -327,3 +328,27 @@ void panic_if_irq_remap(const char *msg) if (irq_remapping_enabled) panic(msg); } + +static void ir_ack_apic_edge(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static void ir_ack_apic_level(struct irq_data *data) +{ + ack_APIC_irq(); + eoi_ioapic_irq(data->irq, data->chip_data); +} + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; +} -- cgit v0.10.2 From 9f9d39e403faf5e1a22334fe4df96516e4f389a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:46 +0200 Subject: x86, io-apic: Remove !irq_remapped() check from __target_IO_APIC_irq() This function is only called from default_ioapic_set_affinity() which is only used when interrupt remapping is disabled since the introduction of the set_affinity function pointer. So the check will always evaluate as true and can be removed. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0fd5f30..5b7eb70 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2299,12 +2299,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); + + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; -- cgit v0.10.2 From 11b4a1cc3836ac71a214446d350e923c76012368 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:47 +0200 Subject: x86, irq: Move irq_remapped() check into free_remapped_irq The function is called unconditionally now in IO-APIC code removing another irq_remapped() check from x86 core code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5b7eb70..1104839 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3003,8 +3003,8 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - if (irq_remapped(cfg)) - free_remapped_irq(irq); + free_remapped_irq(irq); + raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 75afdf4..cff0478 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -282,10 +282,13 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, void free_remapped_irq(int irq) { + struct irq_cfg *cfg = irq_get_chip_data(irq); + if (!remap_ops || !remap_ops->free_irq) return; - remap_ops->free_irq(irq); + if (irq_remapped(cfg)) + remap_ops->free_irq(irq); } void compose_remapped_msi_msg(struct pci_dev *pdev, -- cgit v0.10.2 From 2976fd8417f5744de3bb9109e4f30f353a36b1c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:48 +0200 Subject: x86, irq: Introduce setup_remapped_irq() This function does irq-remapping specific interrupt setup like modifying the chip defaults. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 562db68..b30fca1 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -44,6 +44,9 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, struct msi_msg *msg, u8 hpet_id); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); +extern bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip); static inline bool irq_remapped(struct irq_cfg *cfg) { @@ -93,6 +96,12 @@ static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) { } +static inline bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip) +{ + return false; +} #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1104839..3725122 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1288,11 +1288,8 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi = false; } - if (irq_remapped(cfg)) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); + if (setup_remapped_irq(irq, cfg, chip)) fasteoi = trigger != 0; - } hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; irq_set_chip_and_handler_name(irq, chip, hdl, @@ -3131,10 +3128,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (!irq_offset) write_msi_msg(irq, &msg); - if (irq_remapped(irq_get_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - } + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3272,8 +3266,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(irq_get_chip_data(irq))) - irq_remap_modify_chip_defaults(chip); + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index cff0478..339260c 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -355,3 +355,12 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_eoi = ir_ack_apic_level; chip->irq_set_affinity = x86_io_apic_ops.set_affinity; } + +bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) +{ + if (!irq_remapped(cfg)) + return false; + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + irq_remap_modify_chip_defaults(chip); + return true; +} -- cgit v0.10.2 From 7601384f91be1a5ea60cb4ef6e28cad628e6cd1e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:49 +0200 Subject: x86, msi: Introduce x86_msi.compose_msi_msg call-back This call-back points to the right function for initializing the msi_msg structure. The old code for msi_msg generation was split up into the irq-remapped and the default case. The irq-remapped case just calls into the specific Intel or AMD implementation when the device is behind an IOMMU. Otherwise the default function is called. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 36fb5ab..1838e88 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -158,6 +158,9 @@ extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); +extern void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -242,6 +245,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL +#define native_compose_msi_msg NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 17da29c..c9f87be 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -181,9 +181,13 @@ struct x86_platform_ops { }; struct pci_dev; +struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); + void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, + u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev, int irq); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3725122..b832810 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3019,37 +3019,16 @@ void destroy_irqs(unsigned int irq, unsigned int count) /* * MSI message composition */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) { - struct irq_cfg *cfg; - int err; - unsigned dest; + struct irq_cfg *cfg = irq_cfg(irq); - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - if (irq_remapped(cfg)) { - compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); - return 0; - } + msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); msg->address_lo = MSI_ADDR_BASE_LO | @@ -3068,6 +3047,30 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); +} + +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); return 0; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 06db44f..ee4a17c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -113,6 +113,7 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, + .compose_msi_msg = native_compose_msi_msg, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, .restore_msi_irqs = default_restore_msi_irqs, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 339260c..158091b3 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -150,6 +150,7 @@ static void __init irq_remapping_modify_x86_ops(void) x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; + x86_msi.compose_msi_msg = compose_remapped_msi_msg; } static __init int setup_nointremap(char *str) @@ -295,10 +296,12 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - if (!remap_ops || !remap_ops->compose_msi_msg) - return; + struct irq_cfg *cfg = irq_get_chip_data(irq); - remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); + if (!irq_remapped(cfg)) + native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); + else if (remap_ops && remap_ops->compose_msi_msg) + remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -- cgit v0.10.2 From da165322dfb6cbc50042b1051f07b837a26f3bb8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:50 +0200 Subject: x86, io_apic: Introduce eoi_ioapic_pin call-back This callback replaces the old __eoi_ioapic_pin function which needs a special path for interrupt remapping. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1838e88..459e50a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -161,6 +161,7 @@ extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); extern void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); +extern void native_eoi_ioapic_pin(int apic, int pin, int vector); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -211,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned { x86_io_apic_ops.modify(apic, reg, value); } + +extern void io_apic_eoi(unsigned int apic, unsigned int vector); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 @@ -246,6 +250,7 @@ static inline void disable_ioapic_support(void) { } #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL #define native_compose_msi_msg NULL +#define native_eoi_ioapic_pin NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c9f87be..7669941 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -212,6 +212,7 @@ struct x86_io_apic_ops { int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr); + void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b832810..9ed796c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -310,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -557,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) +void native_eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (cfg && irq_remapped(cfg)) - io_apic_eoi(apic, pin); - else - io_apic_eoi(apic, vector); + io_apic_eoi(apic, vector); } else { struct IO_APIC_route_entry entry, entry1; @@ -597,7 +588,8 @@ void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) - __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); + x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, + cfg->vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -634,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) } raw_spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_pin(apic, pin, entry.vector, NULL); + x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ee4a17c..d065d67 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,4 +129,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .print_entries = native_io_apic_print_entries, .set_affinity = native_ioapic_set_affinity, .setup_entry = native_setup_ioapic_entry, + .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 158091b3..849ce4c 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -143,11 +143,23 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, return do_setup_msix_irqs(dev, nvec); } +void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +{ + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + io_apic_eoi(apic, pin); +} + static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; + x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; x86_msi.compose_msi_msg = compose_remapped_msi_msg; -- cgit v0.10.2 From a1bb20c232d066de0762f8e7cf332e5ce8385210 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:51 +0200 Subject: x86, irq: Move irq_remapped out of x86 core code The irq_remapped function is only used in IOMMU code after the last patch. So move its definition there too. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b30fca1..95fd352 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -48,11 +48,6 @@ extern bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip); -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return (cfg->remapped == 1); -} - void irq_remap_modify_chip_defaults(struct irq_chip *chip); #else /* CONFIG_IRQ_REMAP */ @@ -87,11 +82,6 @@ static inline void panic_if_irq_remap(const char *msg) { } -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return false; -} - static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) { } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 849ce4c..d56f8c1 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -31,6 +31,11 @@ static int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force); +static bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + static void irq_remapping_disable_io_apic(void) { /* -- cgit v0.10.2 From 3b4a505821615b6c055536a0c23ea37c349bb6a9 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Mon, 28 Jan 2013 18:59:12 -0800 Subject: x86, kvm: Fix intialization warnings in kvm.c With commit: 4cca6ea04d31 ("x86/apic: Allow x2apic without IR on VMware platform") we started seeing "incompatible initialization" warning messages, since x2apic_available() expects a bool return type while kvm_para_available() returns an int. Reported by: kbuild test robot Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed1f161..65231e1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, return ret; } -static inline int kvm_para_available(void) +static inline bool kvm_para_available(void) { unsigned int eax, ebx, ecx, edx; char signature[13]; if (boot_cpu_data.cpuid_level < 0) - return 0; /* So we don't blow up on old processors */ + return false; /* So we don't blow up on old processors */ if (cpu_has_hypervisor) { cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); @@ -101,10 +101,10 @@ static inline int kvm_para_available(void) signature[12] = 0; if (strcmp(signature, "KVMKVMKVM") == 0) - return 1; + return true; } - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) -- cgit v0.10.2 From af8d102f999a41c0189bd2cce488bac2ee88c29b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 1 Feb 2013 14:57:43 -0800 Subject: x86/intel/irq_remapping: Clean up x2apic opt-out security warning mess Current kernels print this on my Dell server: ------------[ cut here ]------------ WARNING: at drivers/iommu/intel_irq_remapping.c:542 intel_enable_irq_remapping+0x7b/0x27e() Hardware name: PowerEdge R620 Your BIOS is broken and requested that x2apic be disabled This will leave your machine vulnerable to irq-injection attacks Use 'intremap=no_x2apic_optout' to override BIOS request [...] Enabled IRQ remapping in xapic mode x2apic not enabled, IRQ remapping is in xapic mode This is inconsistent with itself -- interrupt remapping is *on*. Fix the mess by making the warnings say what they mean and my making sure that compatibility format interrupts (the dangerous ones) are disabled if x2apic is present regardless of BIOS settings. With this patch applied, the output is: Your BIOS is broken and requested that x2apic be disabled. This will slightly decrease performance. Use 'intremap=no_x2apic_optout' to override BIOS request. Enabled IRQ remapping in xapic mode x2apic not enabled, IRQ remapping is in xapic mode This should make us as or more secure than we are now and replace a rather scary warning with a much less scary warning on silly but functional systems. Signed-off-by: Andy Lutomirski Cc: Suresh Siddha Cc: Prarit Bhargava Cc: Gleb Natapov Cc: Don Zickus Cc: Alex Williamson Link: http://lkml.kernel.org/r/2011b943a886fd7c46079eb10bc24fc130587503.1359759303.git.luto@amacapital.net Signed-off-by: Ingo Molnar diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 7ca4947..f3b8f23 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -429,11 +429,22 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) /* Enable interrupt-remapping */ iommu->gcmd |= DMA_GCMD_IRE; + iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); + /* + * With CFI clear in the Global Command register, we should be + * protected from dangerous (i.e. compatibility) interrupts + * regardless of x2apic status. Check just to be sure. + */ + if (sts & DMA_GSTS_CFIS) + WARN(1, KERN_WARNING + "Compatibility-format IRQs enabled despite intr remapping;\n" + "you are vulnerable to IRQ injection.\n"); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } @@ -530,20 +541,24 @@ static int __init intel_irq_remapping_supported(void) static int __init intel_enable_irq_remapping(void) { struct dmar_drhd_unit *drhd; + bool x2apic_present; int setup = 0; int eim = 0; + x2apic_present = x2apic_supported(); + if (parse_ioapics_under_ir() != 1) { printk(KERN_INFO "Not enable interrupt remapping\n"); - return -1; + goto error; } - if (x2apic_supported()) { + if (x2apic_present) { eim = !dmar_x2apic_optout(); - WARN(!eim, KERN_WARNING - "Your BIOS is broken and requested that x2apic be disabled\n" - "This will leave your machine vulnerable to irq-injection attacks\n" - "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); + if (!eim) + printk(KERN_WARNING + "Your BIOS is broken and requested that x2apic be disabled.\n" + "This will slightly decrease performance.\n" + "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); } for_each_drhd_unit(drhd) { @@ -582,7 +597,7 @@ static int __init intel_enable_irq_remapping(void) if (eim && !ecap_eim_support(iommu->ecap)) { printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); - return -1; + goto error; } } @@ -598,7 +613,7 @@ static int __init intel_enable_irq_remapping(void) printk(KERN_ERR "DRHD %Lx: failed to enable queued, " " invalidation, ecap %Lx, ret %d\n", drhd->reg_base_addr, iommu->ecap, ret); - return -1; + goto error; } } @@ -637,6 +652,11 @@ error: /* * handle error condition gracefully here! */ + + if (x2apic_present) + WARN(1, KERN_WARNING + "Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); + return -1; } -- cgit v0.10.2