From 83ce6ef8408bbc7d9322ab50ba592f83012dea94 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 28 Jun 2007 16:05:34 -0700 Subject: [IA64] Don't set psr.ic and psr.i simultaneously It's not a good idea to use "ssm psr.ic | psr.i" to simultaneously enable interrupts and interrupt state collection, the two bits can take effect asynchronously, so it is possible for an interrupt to be serviced while psr.ic is still zero. Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S index f2d4900..3bccb06 100644 --- a/arch/ia64/kernel/mca_drv_asm.S +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -40,7 +40,11 @@ GLOBAL_ENTRY(mca_handler_bhhook) mov b6=loc1 ;; mov loc1=rp - ssm psr.i | psr.ic + ssm psr.ic + ;; + srlz.i + ;; + ssm psr.i br.call.sptk.many rp=b6 // does not return ... ;; mov ar.pfs=loc0 -- cgit v0.10.2 From d7ad2254fa7cc11aec3faeba076c1243f6adeb47 Mon Sep 17 00:00:00 2001 From: John Keller Date: Mon, 9 Jul 2007 11:42:24 -0700 Subject: [IA64] SN: Correct ROM resource length for BIOS copy On SN systems, when setting the IORESOURCE_ROM_BIOS_COPY resource flag, the resource length should be set to the actual size of the ROM image so that a call to pci_map_rom() returns the correct size. Signed-off-by: John Keller Signed-off-by: Andrew Morton Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index c6216f4..3c7178f 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -418,7 +418,7 @@ sn_acpi_slot_fixup(struct pci_dev *dev) void __iomem *addr; struct pcidev_info *pcidev_info = NULL; struct sn_irq_info *sn_irq_info = NULL; - size_t size; + size_t image_size, size; if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) { panic("%s: Failure obtaining pcidev_info for %s\n", @@ -428,17 +428,16 @@ sn_acpi_slot_fixup(struct pci_dev *dev) if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) { /* * A valid ROM image exists and has been shadowed by the - * PROM. Setup the pci_dev ROM resource to point to - * the shadowed copy. + * PROM. Setup the pci_dev ROM resource with the address + * of the shadowed copy, and the actual length of the ROM image. */ - size = dev->resource[PCI_ROM_RESOURCE].end - - dev->resource[PCI_ROM_RESOURCE].start; - addr = - ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], - size); + size = pci_resource_len(dev, PCI_ROM_RESOURCE); + addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], + size); + image_size = pci_get_rom_size(addr, size); dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; dev->resource[PCI_ROM_RESOURCE].end = - (unsigned long) addr + size; + (unsigned long) addr + image_size - 1; dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY; } sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 6b10e5d..906b936 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -259,9 +259,23 @@ sn_io_slot_fixup(struct pci_dev *dev) insert_resource(&ioport_resource, &dev->resource[idx]); else insert_resource(&iomem_resource, &dev->resource[idx]); - /* If ROM, mark as shadowed in PROM */ - if (idx == PCI_ROM_RESOURCE) - dev->resource[idx].flags |= IORESOURCE_ROM_BIOS_COPY; + /* + * If ROM, set the actual ROM image size, and mark as + * shadowed in PROM. + */ + if (idx == PCI_ROM_RESOURCE) { + size_t image_size; + void __iomem *rom; + + rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), + size + 1); + image_size = pci_get_rom_size(rom, size + 1); + dev->resource[PCI_ROM_RESOURCE].end = + dev->resource[PCI_ROM_RESOURCE].start + + image_size - 1; + dev->resource[PCI_ROM_RESOURCE].flags |= + IORESOURCE_ROM_BIOS_COPY; + } } /* Create a pci_window in the pci_controller struct for * each device resource. diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index d087e08..dbbcc04 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -54,6 +54,49 @@ static void pci_disable_rom(struct pci_dev *pdev) } /** + * pci_get_rom_size - obtain the actual size of the ROM image + * @rom: kernel virtual pointer to image of ROM + * @size: size of PCI window + * return: size of actual ROM image + * + * Determine the actual length of the ROM image. + * The PCI window size could be much larger than the + * actual image size. + */ +size_t pci_get_rom_size(void __iomem *rom, size_t size) +{ + void __iomem *image; + int last_image; + + image = rom; + do { + void __iomem *pds; + /* Standard PCI ROMs start out with these bytes 55 AA */ + if (readb(image) != 0x55) + break; + if (readb(image + 1) != 0xAA) + break; + /* get the PCI data structure and check its signature */ + pds = image + readw(image + 24); + if (readb(pds) != 'P') + break; + if (readb(pds + 1) != 'C') + break; + if (readb(pds + 2) != 'I') + break; + if (readb(pds + 3) != 'R') + break; + last_image = readb(pds + 21) & 0x80; + /* this length is reliable */ + image += readw(pds + 16) * 512; + } while (!last_image); + + /* never return a size larger than the PCI resource window */ + /* there are known ROMs that get the size wrong */ + return min((size_t)(image - rom), size); +} + +/** * pci_map_rom - map a PCI ROM to kernel space * @pdev: pointer to pci device struct * @size: pointer to receive size of pci window over ROM @@ -68,8 +111,6 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; loff_t start; void __iomem *rom; - void __iomem *image; - int last_image; /* * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy @@ -117,33 +158,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) * size is much larger than the actual size of the ROM. * True size is important if the ROM is going to be copied. */ - image = rom; - do { - void __iomem *pds; - /* Standard PCI ROMs start out with these bytes 55 AA */ - if (readb(image) != 0x55) - break; - if (readb(image + 1) != 0xAA) - break; - /* get the PCI data structure and check its signature */ - pds = image + readw(image + 24); - if (readb(pds) != 'P') - break; - if (readb(pds + 1) != 'C') - break; - if (readb(pds + 2) != 'I') - break; - if (readb(pds + 3) != 'R') - break; - last_image = readb(pds + 21) & 0x80; - /* this length is reliable */ - image += readw(pds + 16) * 512; - } while (!last_image); - - /* never return a size larger than the PCI resource window */ - /* there are known ROMs that get the size wrong */ - *size = min((size_t)(image - rom), *size); - + *size = pci_get_rom_size(rom, *size); return rom; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 086a0e5..acb9387 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -560,6 +560,7 @@ void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void __iomem __must_check *pci_map_rom_copy(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); void pci_remove_rom(struct pci_dev *pdev); +size_t pci_get_rom_size(void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); -- cgit v0.10.2 From c6255e9865a07d5313d6162482792f7bec9160db Mon Sep 17 00:00:00 2001 From: Christian Kandeler Date: Mon, 9 Jul 2007 16:19:11 +0200 Subject: [IA64] Stop bit for brl instruction SDM says that brl instruction must be followed by a stop bit. Fix instance in BRL_COND_FSYS_BUBBLE_DOWN where it isn't. Signed-off-by: Christian Kandeler Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index 3274850..74b1ccc 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -30,6 +30,7 @@ .previous #define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ [1:](pr)brl.cond.sptk 0; \ + ;; \ .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. GLOBAL_ENTRY(__kernel_syscall_via_break) -- cgit v0.10.2 From 9e121327b37b751ef66e6f57e2d02dd568955148 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 11 Jul 2007 10:22:16 -0500 Subject: [IA64] add sn_register_pmi_handler oemcall Add wrapper function to make SN_SAL_REGISTER_PMI_HANDLER ia64_sal_oemcall. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index d3566a2..676b31a 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -32,6 +32,7 @@ #define SN_SAL_NO_FAULT_ZONE_VIRTUAL 0x02000010 #define SN_SAL_NO_FAULT_ZONE_PHYSICAL 0x02000011 #define SN_SAL_PRINT_ERROR 0x02000012 +#define SN_SAL_REGISTER_PMI_HANDLER 0x02000014 #define SN_SAL_SET_ERROR_HANDLING_FEATURES 0x0200001a // reentrant #define SN_SAL_GET_FIT_COMPT 0x0200001b // reentrant #define SN_SAL_GET_SAPIC_INFO 0x0200001d @@ -680,6 +681,25 @@ sn_register_nofault_code(u64 start_addr, u64 end_addr, u64 return_addr, } /* + * Register or unregister a function to handle a PMI received by a CPU. + * Before calling the registered handler, SAL sets r1 to the value that + * was passed in as the global_pointer. + * + * If the handler pointer is NULL, then the currently registered handler + * will be unregistered. + * + * Returns 0 on success, or a negative value if an error occurred. + */ +static inline int +sn_register_pmi_handler(u64 handler, u64 global_pointer) +{ + struct ia64_sal_retval ret_stuff; + ia64_sal_oemcall(&ret_stuff, SN_SAL_REGISTER_PMI_HANDLER, handler, + global_pointer, 0, 0, 0, 0, 0); + return ret_stuff.status; +} + +/* * Change or query the coherence domain for this partition. Each cpu-based * nasid is represented by a bit in an array of 64-bit words: * 0 = not in this partition's coherency domain -- cgit v0.10.2 From 012b7105cc816fb797eb1c161cdfc0052b5c3f53 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 11 Jul 2007 11:02:15 -0600 Subject: [IA64] prevent MCA when performing MMIO mmap to PCI config space Example memory map (HP rx7640 with 'default' acpiconfig setting, VGA disabled): 0x00000000 - 0x3FFFBFFF supports only WB (cacheable) access If a user attempts to perform an MMIO mmap (using the PCIIOC_MMAP_IS_MEM ioctl) to PCI config space (like mmap'ing and accessing memory at 0xA0000), we will MCA because the kernel will attempt to use a mapping with the UC attribute. So check the memory attribute in kern_mmap and the EFI memmap. If WC is requested, and WC or UC access is supported for the region, allow it. Otherwise, use the same attribute the kernel uses. Updates documentation and test cases as well. Signed-off-by: Alex Chiang Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c index d485256..773a814 100644 --- a/Documentation/ia64/aliasing-test.c +++ b/Documentation/ia64/aliasing-test.c @@ -19,6 +19,7 @@ #include #include #include +#include int sum; @@ -34,13 +35,19 @@ int map_mem(char *path, off_t offset, size_t length, int touch) return -1; } + if (fnmatch("/proc/bus/pci/*", path, 0) == 0) { + rc = ioctl(fd, PCIIOC_MMAP_IS_MEM); + if (rc == -1) + perror("PCIIOC_MMAP_IS_MEM ioctl"); + } + addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset); if (addr == MAP_FAILED) return 1; if (touch) { c = (int *) addr; - while (c < (int *) (offset + length)) + while (c < (int *) (addr + length)) sum += *c++; } @@ -54,7 +61,7 @@ int map_mem(char *path, off_t offset, size_t length, int touch) return 0; } -int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch) +int scan_tree(char *path, char *file, off_t offset, size_t length, int touch) { struct dirent **namelist; char *name, *path2; @@ -93,7 +100,7 @@ int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch) } else { r = lstat(path2, &buf); if (r == 0 && S_ISDIR(buf.st_mode)) { - rc = scan_sysfs(path2, file, offset, length, touch); + rc = scan_tree(path2, file, offset, length, touch); if (rc < 0) return rc; } @@ -238,10 +245,15 @@ int main() else fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n"); - scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); - scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); - scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); - scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); scan_rom("/sys/devices", "rom"); + + scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1); + scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0); + scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1); + scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0); } diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt index 9a431a7..aa3e953 100644 --- a/Documentation/ia64/aliasing.txt +++ b/Documentation/ia64/aliasing.txt @@ -112,6 +112,18 @@ POTENTIAL ATTRIBUTE ALIASING CASES The /dev/mem mmap constraints apply. + mmap of /proc/bus/pci/.../??.? + + This is an MMIO mmap of PCI functions, which additionally may or + may not be requested as using the WC attribute. + + If WC is requested, and the region in kern_memmap is either WC + or UC, and the EFI memory map designates the region as WC, then + the WC mapping is allowed. + + Otherwise, the user mapping must use the same attribute as the + kernel mapping. + read/write of /dev/mem This uses copy_from_user(), which implicitly uses a kernel diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 73696b4..07d0e92 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -591,6 +591,9 @@ int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; + /* * I/O space cannot be accessed via normal processor loads and * stores on this platform. @@ -604,15 +607,24 @@ pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, */ return -EINVAL; + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) + return -EINVAL; + + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + /* - * Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. + * If the user requested WC, the kernel uses UC or WC for this region, + * and the chipset supports WC, we can use WC. Otherwise, we have to + * use the same attribute the kernel uses. */ - if (write_combine && efi_range_is_wc(vma->vm_start, - vma->vm_end - vma->vm_start)) + if (write_combine && + ((pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_UC || + (pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_WC) && + efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = prot; if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -- cgit v0.10.2 From 256a7e097ba3d1179867b4c9aba1b75fb32d44f2 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Wed, 11 Jul 2007 17:26:30 +0200 Subject: [IA64] silence GCC ia64 unused variable warnings Tell GCC to stop spewing out unnecessary warnings for unused variables passed to functions as pointers for ia64 files. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index af73b8d..fa40cba 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -513,7 +513,8 @@ copy_thread (int nr, unsigned long clone_flags, static void do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) { - unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm; + unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm; + unsigned long uninitialized_var(ip); /* GCC be quiet */ elf_greg_t *dst = arg; struct pt_regs *pt; char nat; diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index fa4e6d4..1682fc6 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -175,7 +175,7 @@ EXPORT_SYMBOL(flush_tlb_range); void __devinit ia64_tlb_init (void) { - ia64_ptce_info_t ptce_info; + ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */ unsigned long tr_pgbits; long status; diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 493380b..5a289e4 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -369,7 +369,7 @@ static void tio_corelet_reset(nasid_t nasid, int corelet) static int is_fpga_tio(int nasid, int *bt) { - u16 ioboard_type; + u16 uninitialized_var(ioboard_type); /* GCC be quiet */ s64 rc; rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index b42bfca..42485ad 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -80,7 +80,7 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft) u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus) { s64 rc; - u16 ioboard; + u16 uninitialized_var(ioboard); /* GCC be quiet */ nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard); -- cgit v0.10.2 From 1612b18ccb2318563ba51268289dc3271a6052f7 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 18 May 2007 17:17:17 -0500 Subject: [IA64] Support multiple CPUs going through OS_MCA Linux does not gracefully deal with multiple processors going through OS_MCA aa part of the same MCA event. The first cpu into OS_MCA grabs the ia64_mca_serialize lock. Subsequent cpus wait for that lock, preventing them from reporting in as rendezvoused. The first cpu waits 5 seconds then complains that all the cpus have not rendezvoused. The first cpu then handles its MCA and frees up all the rendezvoused cpus and releases the ia64_mca_serialize lock. One of the subsequent cpus going thought OS_MCA then gets the ia64_mca_serialize lock, waits another 5 seconds and then complains that none of the other cpus have rendezvoused. This patch allows multiple CPUs to gracefully go through OS_MCA. The first CPU into ia64_mca_handler() grabs a mca_count lock. Subsequent CPUs into ia64_mca_handler() are added to a list of cpus that need to go through OS_MCA (a bit set in mca_cpu), and report in as rendezvoused, and but spin waiting their turn. The first CPU sees everyone rendezvous, handles his MCA, wakes up one of the other CPUs waiting to process their MCA (by clearing one mca_cpu bit), and then waits for the other cpus to complete their MCA handling. The next CPU handles his MCA and the process repeats until all the CPUs have handled their MCA. When the last CPU has handled it's MCA, it sets monarch_cpu to -1, releasing all the CPUs. In testing this works more reliably and faster. Thanks to Keith Owens for suggesting numerous improvements to this code. Signed-off-by: Russ Anderson Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 1ead5ea..4b5daa3 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -57,6 +57,9 @@ * * 2006-09-15 Hidetoshi Seto * Add printing support for MCA/INIT. + * + * 2007-04-27 Russ Anderson + * Support multiple cpus going through OS_MCA in the same event. */ #include #include @@ -96,7 +99,6 @@ #endif /* Used by mca_asm.S */ -u32 ia64_mca_serialize; DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ @@ -963,11 +965,12 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, goto no_mod; } + if (r13 != sos->prev_IA64_KR_CURRENT) { + msg = "inconsistent previous current and r13"; + goto no_mod; + } + if (!mca_recover_range(ms->pmsa_iip)) { - if (r13 != sos->prev_IA64_KR_CURRENT) { - msg = "inconsistent previous current and r13"; - goto no_mod; - } if ((r12 - r13) >= KERNEL_STACK_SIZE) { msg = "inconsistent r12 and r13"; goto no_mod; @@ -1187,6 +1190,13 @@ all_in: * further MCA logging is enabled by clearing logs. * Monarch also has the duty of sending wakeup-IPIs to pull the * slave processors out of rendezvous spinloop. + * + * If multiple processors call into OS_MCA, the first will become + * the monarch. Subsequent cpus will be recorded in the mca_cpu + * bitmask. After the first monarch has processed its MCA, it + * will wake up the next cpu in the mca_cpu bitmask and then go + * into the rendezvous loop. When all processors have serviced + * their MCA, the last monarch frees up the rest of the processors. */ void ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, @@ -1196,16 +1206,32 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, struct task_struct *previous_current; struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; + static atomic_t mca_count; + static cpumask_t mca_cpu; + if (atomic_add_return(1, &mca_count) == 1) { + monarch_cpu = cpu; + sos->monarch = 1; + } else { + cpu_set(cpu, mca_cpu); + sos->monarch = 0; + } mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); - monarch_cpu = cpu; + if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - ia64_wait_for_slaves(cpu, "MCA"); + if (sos->monarch) { + ia64_wait_for_slaves(cpu, "MCA"); + } else { + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; + while (cpu_isset(cpu, mca_cpu)) + cpu_relax(); /* spin until monarch wakes us */ + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + } /* Wakeup all the processors which are spinning in the rendezvous loop. * They will leave SAL, then spin in the OS with interrupts disabled @@ -1244,6 +1270,26 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); + + if (atomic_dec_return(&mca_count) > 0) { + int i; + + /* wake up the next monarch cpu, + * and put this cpu in the rendez loop. + */ + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; + for_each_online_cpu(i) { + if (cpu_isset(i, mca_cpu)) { + monarch_cpu = i; + cpu_clear(i, mca_cpu); /* wake next cpu */ + while (monarch_cpu != -1) + cpu_relax(); /* spin until last cpu leaves */ + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + set_curr_task(cpu, previous_current); + return; + } + } + } set_curr_task(cpu, previous_current); monarch_cpu = -1; } diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 8c9c26a..0f5965f 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -133,14 +133,6 @@ ia64_do_tlb_purge: //StartMain//////////////////////////////////////////////////////////////////// ia64_os_mca_dispatch: - // Serialize all MCA processing - mov r3=1;; - LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);; -ia64_os_mca_spin: - xchg4 r4=[r2],r3;; - cmp.ne p6,p0=r4,r0 -(p6) br ia64_os_mca_spin - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack LOAD_PHYSICAL(p0,r2,1f) // return address mov r19=1 // All MCA events are treated as monarch (for now) @@ -291,10 +283,6 @@ END(ia64_os_mca_virtual_begin) mov b0=r12 // SAL_CHECK return address - // release lock - LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);; - st4.rel [r3]=r0 - br b0 //EndMain////////////////////////////////////////////////////////////////////// diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index 41098f4..edd5d01 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h @@ -48,6 +48,7 @@ enum { IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0, IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1, IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2, + IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3, }; /* Information maintained by the MC infrastructure */ -- cgit v0.10.2