From d7c6797fbc2c2efa7573817685d2a76fd274d2de Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 19 Mar 2013 10:36:13 +0100 Subject: tiocx: check retval from bus_register() Properly check return value from bus_register() and propagate it out of tiocx_init() in case of failure. Reported-by: Fengguang Wu Signed-off-by: Jiri Kosina Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 14c1711..e35f648 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -490,11 +490,14 @@ static int __init tiocx_init(void) { cnodeid_t cnodeid; int found_tiocx_device = 0; + int err; if (!ia64_platform_is("sn2")) return 0; - bus_register(&tiocx_bus_type); + err = bus_register(&tiocx_bus_type); + if (err) + return err; for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { nasid_t nasid; -- cgit v0.10.2 From deb60015096102f9842b631dcad98a05001268e9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 18 Mar 2013 17:03:03 -0700 Subject: Fix broken fsys_getppid() In particular fsys_getppid always returns the ppid in the initial pid namespace so it does not work for a process in a pid namespace. Fix from Eric Biederman just removes the fast system call path. While it is a little bit sad to see another one of these bite the dust ... I can't imagine that getppid() is really on any real applications critical path. Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index c4cd45d..abc6dee 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -90,53 +90,6 @@ ENTRY(fsys_getpid) FSYS_RETURN END(fsys_getpid) -ENTRY(fsys_getppid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->group_leader - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - - ld4 r9=[r9] - add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent - ;; - and r9=TIF_ALLWORK_MASK,r9 - -1: ld8 r18=[r17] // r18 = current->group_leader->real_parent - ;; - cmp.ne p8,p0=0,r9 - add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid - ;; - - /* - * The .acq is needed to ensure that the read of tgid has returned its data before - * we re-check "real_parent". - */ - ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid -#ifdef CONFIG_SMP - /* - * Re-read current->group_leader->real_parent. - */ - ld8 r19=[r17] // r19 = current->group_leader->real_parent -(p8) br.spnt.many fsys_fallback_syscall - ;; - cmp.ne p6,p0=r18,r19 // did real_parent change? - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... -#else - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - mov r19=0 // i must not leak kernel bits... -#endif - FSYS_RETURN -END(fsys_getppid) - ENTRY(fsys_set_tid_address) .prologue .altrp b6 @@ -614,7 +567,7 @@ paravirt_fsyscall_table: data8 0 // chown data8 0 // lseek // 1040 data8 fsys_getpid // getpid - data8 fsys_getppid // getppid + data8 0 // getppid data8 0 // mount data8 0 // umount data8 0 // setuid // 1045 -- cgit v0.10.2 From a4279e6202bbd08ac6038234571ac639c98879cf Mon Sep 17 00:00:00 2001 From: "Li, Zhen-Hua" Date: Mon, 18 Mar 2013 10:45:43 +0800 Subject: Add WB/UC check for early_ioremap On ia64 system, the function early_ioremap returned an uncached memory reference without checking whether this was consistent with existing mappings. This causes efi error and the kernel failed during boot. Add a check to test whether memory has EFI_MEMORY_WB set. Use the function kern_mem_attribute() in early_iomap() function to provide appropriate cacheable or uncacheable mapped address. See the document Documentation/ia64/aliasing.txt for more details. Signed-off-by: Li, Zhen-Hua Signed-off-by: Tony Luck diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 3dccdd8..43964cd 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -16,7 +16,7 @@ #include static inline void __iomem * -__ioremap (unsigned long phys_addr) +__ioremap_uc(unsigned long phys_addr) { return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); } @@ -24,7 +24,11 @@ __ioremap (unsigned long phys_addr) void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size) { - return __ioremap(phys_addr); + u64 attr; + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + return __ioremap_uc(phys_addr); } void __iomem * @@ -47,7 +51,7 @@ ioremap (unsigned long phys_addr, unsigned long size) if (attr & EFI_MEMORY_WB) return (void __iomem *) phys_to_virt(phys_addr); else if (attr & EFI_MEMORY_UC) - return __ioremap(phys_addr); + return __ioremap_uc(phys_addr); /* * Some chipsets don't support UC access to memory. If @@ -93,7 +97,7 @@ ioremap (unsigned long phys_addr, unsigned long size) return (void __iomem *) (offset + (char __iomem *)addr); } - return __ioremap(phys_addr); + return __ioremap_uc(phys_addr); } EXPORT_SYMBOL(ioremap); @@ -103,7 +107,7 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size) if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) return NULL; - return __ioremap(phys_addr); + return __ioremap_uc(phys_addr); } EXPORT_SYMBOL(ioremap_nocache); -- cgit v0.10.2 From c74edea33c57ae98121a1fdfa5512fbcd9c74875 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 8 Mar 2013 12:32:52 +0800 Subject: iosapic: fix a minor typo in comments describeinterrupts -> describe interrupts Signed-off-by: Hanjun Guo Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index ee33c3a..a6e2f75 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -76,7 +76,7 @@ * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ * * Note: The term "IRQ" is loosely used everywhere in Linux kernel to - * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ * (isa_irq) is the only exception in this source code. */ -- cgit v0.10.2 From ffa9095532d758cab6ecd6f161b08e338e7f64cb Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 8 Mar 2013 12:33:35 +0800 Subject: Fix kexec oops when iosapic was removed Iosapic hotplug was supported in IA64 code, but will lead to kexec oops when iosapic was removed. here is the code logic: iosapic_remove iosapic_free memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])) iosapic_lists[index].addr was set to 0; and then kexec a new kernel kexec_disable_iosapic iosapic_write(rte->iosapic,..) __iosapic_write(iosapic->addr, reg, val); addr was set to 0 when iosapic_remove, and oops happened The call trace is: Starting new kernel kexec[11336]: Oops 8804682956800 [1] Modules linked in: raw(N) ipv6(N) acpi_cpufreq(N) binfmt_misc(N) fuse(N) nls_iso 8859_1(N) loop(N) ipmi_si(N) ipmi_devintf(N) ipmi_msghandler(N) mca_ereport(N) s csi_ereport(N) nic_ereport(N) pcie_ereport(N) err_transport(N) nvlist(PN) dm_mod (N) tpm_tis(N) tpm(N) ppdev(N) tpm_bios(N) serio_raw(N) i2c_i801(N) iTCO_wdt(N) i2c_core(N) iTCO_vendor_support(N) sg(N) ioatdma(N) igb(N) mptctl(N) dca(N) parp ort_pc(N) parport(N) container(N) button(N) usbhid(N) hid(N) uhci_hcd(N) ehci_hc d(N) usbcore(N) sd_mod(N) crc_t10dif(N) ext3(N) mbcache(N) jbd(N) fan(N) process or(N) ide_pci_generic(N) ide_core(N) ata_piix(N) libata(N) mptsas(N) mptscsih(N) mptbase(N) scsi_transport_sas(N) scsi_mod(N) thermal(N) thermal_sys(N) hwmon(N) Supported: Yes, External Pid: 11336, CPU 0, comm: kexec psr : 0000101009522030 ifs : 8000000000000791 ip : [] Tain ted: P N (2.6.32.12_RAS_V1R3C00B011) ip is at kexec_disable_iosapic+0x120/0x1e0 unat: 0000000000000000 pfs : 0000000000000791 rsc : 0000000000000003 rnat: 0000000000000000 bsps: 0000000000000000 pr : 65519aa6a555a659 ldrs: 0000000000000000 ccv : 00000000ea3cf51e fpsr: 0009804c8a70033f csd : 0000000000000000 ssd : 0000000000000000 b0 : a00000010004c150 b6 : a000000100012620 b7 : a00000010000cda0 f6 : 000000000000000000000 f7 : 1003e0000000002000000 f8 : 1003e0000000050000003 f9 : 1003e0000028fb97183cd f10 : 1003ee9f380df3c548b67 f11 : 1003e00000000000000cc r1 : a0000001016cf660 r2 : 0000000000000000 r3 : 0000000000000000 r8 : 0000001009526030 r9 : a000000100012620 r10 : e00000010053f600 r11 : c0000000fec34040 r12 : e00000078f76fd30 r13 : e00000078f760000 r14 : 0000000000000000 r15 : 0000000000000000 r16 : 0000000000000000 r17 : 0000000000000000 r18 : 0000000000007fff r19 : 0000000000000000 r20 : 0000000000000000 r21 : e00000010053f590 r22 : a000000100cf0000 r23 : 0000000000000036 r24 : e0000007002f8a84 r25 : 0000000000000022 r26 : e0000007002f8a88 r27 : 0000000000000020 r28 : 0000000000000002 r29 : a0000001012c8c60 r30 : 0000000000000000 r31 : 0000000000322e49 Call Trace: [] show_stack+0x80/0xa0 sp=e00000078f76f8f0 bsp=e00000078f761380 [] show_regs+0x640/0x920 sp=e00000078f76fac0 bsp=e00000078f761328 [] die+0x190/0x2e0 sp=e00000078f76fad0 bsp=e00000078f7612e8 [] ia64_do_page_fault+0x840/0xb20 sp=e00000078f76fad0 bsp=e00000078f761288 [] ia64_native_leave_kernel+0x0/0x270 sp=e00000078f76fb60 bsp=e00000078f761288 [] kexec_disable_iosapic+0x120/0x1e0 sp=e00000078f76fd30 bsp=e00000078f761200 [] machine_shutdown+0x110/0x140 sp=e00000078f76fd30 bsp=e00000078f7611c8 [] kernel_kexec+0xd0/0x120 sp=e00000078f76fd30 bsp=e00000078f7611a0 [] sys_reboot+0x480/0x4e0 sp=e00000078f76fd30 bsp=e00000078f761128 [] ia64_ret_from_syscall+0x0/0x20 sp=e00000078f76fe30 bsp=e00000078f761120 Kernel panic - not syncing: Fatal exception With Tony and Toshi's advice, the patch removes the "rte" from rte_list when the iosapic was removed. Signed-off-by: Hanjun Guo Signed-off-by: Jianguo Wu Acked-by: Toshi Kani Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index a6e2f75..19f107b 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -1010,6 +1010,26 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) return 0; } +static int +iosapic_delete_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte, *temp; + + list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes, + rte_list) { + if (rte->iosapic->gsi_base + rte->rte_index == gsi) { + if (rte->refcnt) + return -EBUSY; + + list_del(&rte->rte_list); + kfree(rte); + return 0; + } + } + + return -EINVAL; +} + int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) { int num_rte, err, index; @@ -1069,7 +1089,7 @@ int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) int iosapic_remove(unsigned int gsi_base) { - int index, err = 0; + int i, irq, index, err = 0; unsigned long flags; spin_lock_irqsave(&iosapic_lock, flags); @@ -1087,6 +1107,16 @@ int iosapic_remove(unsigned int gsi_base) goto out; } + for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) { + irq = __gsi_to_irq(i); + if (irq < 0) + continue; + + err = iosapic_delete_rte(irq, i); + if (err) + goto out; + } + iounmap(iosapic_lists[index].addr); iosapic_free(index); out: -- cgit v0.10.2 From 7c13e0d1e8f4eeb3537d9a0b6b70e464c5736854 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 12 Mar 2013 12:47:08 +0800 Subject: Remove cast for kmalloc return value remove cast for kmalloc return value. Signed-off-by: Zhang Yanfei Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 9392e02..94f8bf7 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -349,7 +349,7 @@ init_record_index_pools(void) /* - 3 - */ slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; - slidx_pool.buffer = (slidx_list_t *) + slidx_pool.buffer = kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); return slidx_pool.buffer ? 0 : -ENOMEM; -- cgit v0.10.2 From 136f39ddc53db3bcee2befbe323a56d4fbf06da8 Mon Sep 17 00:00:00 2001 From: Stephan Schreiber Date: Tue, 19 Mar 2013 15:22:27 -0700 Subject: Wrong asm register contraints in the futex implementation The Linux Kernel contains some inline assembly source code which has wrong asm register constraints in arch/ia64/include/asm/futex.h. I observed this on Kernel 3.2.23 but it is also true on the most recent Kernel 3.9-rc1. File arch/ia64/include/asm/futex.h: static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; { register unsigned long r8 __asm ("r8"); unsigned long prev; __asm__ __volatile__( " mf;; \n" " mov %0=r0 \n" " mov ar.ccv=%4;; \n" "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" " .xdata4 \"__ex_table\", 1b-., 2f-. \n" "[2:]" : "=r" (r8), "=r" (prev) : "r" (uaddr), "r" (newval), "rO" ((long) (unsigned) oldval) : "memory"); *uval = prev; return r8; } } The list of output registers is : "=r" (r8), "=r" (prev) The constraint "=r" means that the GCC has to maintain that these vars are in registers and contain valid info when the program flow leaves the assembly block (output registers). But "=r" also means that GCC can put them in registers that are used as input registers. Input registers are uaddr, newval, oldval on the example. The second assembly instruction " mov %0=r0 \n" is the first one which writes to a register; it sets %0 to 0. %0 means the first register operand; it is r8 here. (The r0 is read-only and always 0 on the Itanium; it can be used if an immediate zero value is needed.) This instruction might overwrite one of the other registers which are still needed. Whether it really happens depends on how GCC decides what registers it uses and how it optimizes the code. The objdump utility can give us disassembly. The futex_atomic_cmpxchg_inatomic() function is inline, so we have to look for a module that uses the funtion. This is the cmpxchg_futex_value_locked() function in kernel/futex.c: static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) { int ret; pagefault_disable(); ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); pagefault_enable(); return ret; } Now the disassembly. At first from the Kernel package 3.2.23 which has been compiled with GCC 4.4, remeber this Kernel seemed to work: objdump -d linux-3.2.23/debian/build/build_ia64_none_mckinley/kernel/futex.o 0000000000000230 : 230: 0b 18 80 1b 18 21 [MMI] adds r3=3168,r13;; 236: 80 40 0d 00 42 00 adds r8=40,r3 23c: 00 00 04 00 nop.i 0x0;; 240: 0b 50 00 10 10 10 [MMI] ld4 r10=[r8];; 246: 90 08 28 00 42 00 adds r9=1,r10 24c: 00 00 04 00 nop.i 0x0;; 250: 09 00 00 00 01 00 [MMI] nop.m 0x0 256: 00 48 20 20 23 00 st4 [r8]=r9 25c: 00 00 04 00 nop.i 0x0;; 260: 08 10 80 06 00 21 [MMI] adds r2=32,r3 266: 00 00 00 02 00 00 nop.m 0x0 26c: 02 08 f1 52 extr.u r16=r33,0,61 270: 05 40 88 00 08 e0 [MLX] addp4 r8=r34,r0 276: ff ff 0f 00 00 e0 movl r15=0xfffffffbfff;; 27c: f1 f7 ff 65 280: 09 70 00 04 18 10 [MMI] ld8 r14=[r2] 286: 00 00 00 02 00 c0 nop.m 0x0 28c: f0 80 1c d0 cmp.ltu p6,p7=r15,r16;; 290: 08 40 fc 1d 09 3b [MMI] cmp.eq p8,p9=-1,r14 296: 00 00 00 02 00 40 nop.m 0x0 29c: e1 08 2d d0 cmp.ltu p10,p11=r14,r33 2a0: 56 01 10 00 40 10 [BBB] (p10) br.cond.spnt.few 2e0 2a6: 02 08 00 80 21 03 (p08) br.cond.dpnt.few 2b0 2ac: 40 00 00 41 (p06) br.cond.spnt.few 2e0 2b0: 0a 00 00 00 22 00 [MMI] mf;; 2b6: 80 00 00 00 42 00 mov r8=r0 2bc: 00 00 04 00 nop.i 0x0 2c0: 0b 00 20 40 2a 04 [MMI] mov.m ar.ccv=r8;; 2c6: 10 1a 85 22 20 00 cmpxchg4.acq r33=[r33],r35,ar.ccv 2cc: 00 00 04 00 nop.i 0x0;; 2d0: 10 00 84 40 90 11 [MIB] st4 [r32]=r33 2d6: 00 00 00 02 00 00 nop.i 0x0 2dc: 20 00 00 40 br.few 2f0 2e0: 09 40 c8 f9 ff 27 [MMI] mov r8=-14 2e6: 00 00 00 02 00 00 nop.m 0x0 2ec: 00 00 04 00 nop.i 0x0;; 2f0: 0b 58 20 1a 19 21 [MMI] adds r11=3208,r13;; 2f6: 20 01 2c 20 20 00 ld4 r18=[r11] 2fc: 00 00 04 00 nop.i 0x0;; 300: 0b 88 fc 25 3f 23 [MMI] adds r17=-1,r18;; 306: 00 88 2c 20 23 00 st4 [r11]=r17 30c: 00 00 04 00 nop.i 0x0;; 310: 11 00 00 00 01 00 [MIB] nop.m 0x0 316: 00 00 00 02 00 80 nop.i 0x0 31c: 08 00 84 00 br.ret.sptk.many b0;; The lines 2b0: 0a 00 00 00 22 00 [MMI] mf;; 2b6: 80 00 00 00 42 00 mov r8=r0 2bc: 00 00 04 00 nop.i 0x0 2c0: 0b 00 20 40 2a 04 [MMI] mov.m ar.ccv=r8;; 2c6: 10 1a 85 22 20 00 cmpxchg4.acq r33=[r33],r35,ar.ccv 2cc: 00 00 04 00 nop.i 0x0;; are the instructions of the assembly block. The line 2b6: 80 00 00 00 42 00 mov r8=r0 sets the r8 register to 0 and after that 2c0: 0b 00 20 40 2a 04 [MMI] mov.m ar.ccv=r8;; prepares the 'oldvalue' for the cmpxchg but it takes it from r8. This is wrong. What happened here is what I explained above: An input register is overwritten which is still needed. The register operand constraints in futex.h are wrong. (The problem doesn't occur when the Kernel is compiled with GCC 4.6.) The attached patch fixes the register operand constraints in futex.h. The code after patching of it: static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; { register unsigned long r8 __asm ("r8") = 0; unsigned long prev; __asm__ __volatile__( " mf;; \n" " mov ar.ccv=%4;; \n" "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" " .xdata4 \"__ex_table\", 1b-., 2f-. \n" "[2:]" : "+r" (r8), "=&r" (prev) : "r" (uaddr), "r" (newval), "rO" ((long) (unsigned) oldval) : "memory"); *uval = prev; return r8; } } I also initialized the 'r8' var with the C programming language. The _asm qualifier on the definition of the 'r8' var forces GCC to use the r8 processor register for it. I don't believe that we should use inline assembly for zeroing out a local variable. The constraint is "+r" (r8) what means that it is both an input register and an output register. Note that the page fault handler will modify the r8 register which will be the return value of the function. The real fix is "=&r" (prev) The & means that GCC must not use any of the input registers to place this output register in. Patched the Kernel 3.2.23 and compiled it with GCC4.4: 0000000000000230 : 230: 0b 18 80 1b 18 21 [MMI] adds r3=3168,r13;; 236: 80 40 0d 00 42 00 adds r8=40,r3 23c: 00 00 04 00 nop.i 0x0;; 240: 0b 50 00 10 10 10 [MMI] ld4 r10=[r8];; 246: 90 08 28 00 42 00 adds r9=1,r10 24c: 00 00 04 00 nop.i 0x0;; 250: 09 00 00 00 01 00 [MMI] nop.m 0x0 256: 00 48 20 20 23 00 st4 [r8]=r9 25c: 00 00 04 00 nop.i 0x0;; 260: 08 10 80 06 00 21 [MMI] adds r2=32,r3 266: 20 12 01 10 40 00 addp4 r34=r34,r0 26c: 02 08 f1 52 extr.u r16=r33,0,61 270: 05 40 00 00 00 e1 [MLX] mov r8=r0 276: ff ff 0f 00 00 e0 movl r15=0xfffffffbfff;; 27c: f1 f7 ff 65 280: 09 70 00 04 18 10 [MMI] ld8 r14=[r2] 286: 00 00 00 02 00 c0 nop.m 0x0 28c: f0 80 1c d0 cmp.ltu p6,p7=r15,r16;; 290: 08 40 fc 1d 09 3b [MMI] cmp.eq p8,p9=-1,r14 296: 00 00 00 02 00 40 nop.m 0x0 29c: e1 08 2d d0 cmp.ltu p10,p11=r14,r33 2a0: 56 01 10 00 40 10 [BBB] (p10) br.cond.spnt.few 2e0 2a6: 02 08 00 80 21 03 (p08) br.cond.dpnt.few 2b0 2ac: 40 00 00 41 (p06) br.cond.spnt.few 2e0 2b0: 0b 00 00 00 22 00 [MMI] mf;; 2b6: 00 10 81 54 08 00 mov.m ar.ccv=r34 2bc: 00 00 04 00 nop.i 0x0;; 2c0: 09 58 8c 42 11 10 [MMI] cmpxchg4.acq r11=[r33],r35,ar.ccv 2c6: 00 00 00 02 00 00 nop.m 0x0 2cc: 00 00 04 00 nop.i 0x0;; 2d0: 10 00 2c 40 90 11 [MIB] st4 [r32]=r11 2d6: 00 00 00 02 00 00 nop.i 0x0 2dc: 20 00 00 40 br.few 2f0 2e0: 09 40 c8 f9 ff 27 [MMI] mov r8=-14 2e6: 00 00 00 02 00 00 nop.m 0x0 2ec: 00 00 04 00 nop.i 0x0;; 2f0: 0b 88 20 1a 19 21 [MMI] adds r17=3208,r13;; 2f6: 30 01 44 20 20 00 ld4 r19=[r17] 2fc: 00 00 04 00 nop.i 0x0;; 300: 0b 90 fc 27 3f 23 [MMI] adds r18=-1,r19;; 306: 00 90 44 20 23 00 st4 [r17]=r18 30c: 00 00 04 00 nop.i 0x0;; 310: 11 00 00 00 01 00 [MIB] nop.m 0x0 316: 00 00 00 02 00 80 nop.i 0x0 31c: 08 00 84 00 br.ret.sptk.many b0;; Much better. There is a 270: 05 40 00 00 00 e1 [MLX] mov r8=r0 which was generated by C code r8 = 0. Below 2b6: 00 10 81 54 08 00 mov.m ar.ccv=r34 what means that oldval is no longer overwritten. This is Debian bug#702641 (http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=702641). The patch is applicable on Kernel 3.9-rc1, 3.2.23 and many other versions. Signed-off-by: Stephan Schreiber Cc: stable@vger.kernel.org Signed-off-by: Tony Luck diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h index d2bf1fd..76acbcd 100644 --- a/arch/ia64/include/asm/futex.h +++ b/arch/ia64/include/asm/futex.h @@ -106,16 +106,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; { - register unsigned long r8 __asm ("r8"); + register unsigned long r8 __asm ("r8") = 0; unsigned long prev; __asm__ __volatile__( " mf;; \n" - " mov %0=r0 \n" " mov ar.ccv=%4;; \n" "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" " .xdata4 \"__ex_table\", 1b-., 2f-. \n" "[2:]" - : "=r" (r8), "=r" (prev) + : "+r" (r8), "=&r" (prev) : "r" (uaddr), "r" (newval), "rO" ((long) (unsigned) oldval) : "memory"); -- cgit v0.10.2 From de53e9caa4c6149ef4a78c2f83d7f5b655848767 Mon Sep 17 00:00:00 2001 From: Stephan Schreiber Date: Tue, 19 Mar 2013 15:27:12 -0700 Subject: Wrong asm register contraints in the kvm implementation The Linux Kernel contains some inline assembly source code which has wrong asm register constraints in arch/ia64/kvm/vtlb.c. I observed this on Kernel 3.2.35 but it is also true on the most recent Kernel 3.9-rc1. File arch/ia64/kvm/vtlb.c: u64 guest_vhpt_lookup(u64 iha, u64 *pte) { u64 ret; struct thash_data *data; data = __vtr_lookup(current_vcpu, iha, D_TLB); if (data != NULL) thash_vhpt_insert(current_vcpu, data->page_flags, data->itir, iha, D_TLB); asm volatile ( "rsm psr.ic|psr.i;;" "srlz.d;;" "ld8.s r9=[%1];;" "tnat.nz p6,p7=r9;;" "(p6) mov %0=1;" "(p6) mov r9=r0;" "(p7) extr.u r9=r9,0,53;;" "(p7) mov %0=r0;" "(p7) st8 [%2]=r9;;" "ssm psr.ic;;" "srlz.d;;" "ssm psr.i;;" "srlz.d;;" : "=r"(ret) : "r"(iha), "r"(pte):"memory"); return ret; } The list of output registers is : "=r"(ret) : "r"(iha), "r"(pte):"memory"); The constraint "=r" means that the GCC has to maintain that these vars are in registers and contain valid info when the program flow leaves the assembly block (output registers). But "=r" also means that GCC can put them in registers that are used as input registers. Input registers are iha, pte on the example. If the predicate p7 is true, the 8th assembly instruction "(p7) mov %0=r0;" is the first one which writes to a register which is maintained by the register constraints; it sets %0. %0 means the first register operand; it is ret here. This instruction might overwrite the %2 register (pte) which is needed by the next instruction: "(p7) st8 [%2]=r9;;" Whether it really happens depends on how GCC decides what registers it uses and how it optimizes the code. The attached patch fixes the register operand constraints in arch/ia64/kvm/vtlb.c. The register constraints should be : "=&r"(ret) : "r"(iha), "r"(pte):"memory"); The & means that GCC must not use any of the input registers to place this output register in. This is Debian bug#702639 (http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=702639). The patch is applicable on Kernel 3.9-rc1, 3.2.35 and many other versions. Signed-off-by: Stephan Schreiber Cc: stable@vger.kernel.org Signed-off-by: Tony Luck diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 4332f7e..a7869f8 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -256,7 +256,7 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) "srlz.d;;" "ssm psr.i;;" "srlz.d;;" - : "=r"(ret) : "r"(iha), "r"(pte):"memory"); + : "=&r"(ret) : "r"(iha), "r"(pte) : "memory"); return ret; } -- cgit v0.10.2 From 96edc754aa714e51d2044af91b96cc7420c5cb01 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 5 Mar 2013 14:59:23 +0100 Subject: Change "select DMAR" to "select INTEL_IOMMU" Commit d3f138106b ("iommu: Rename the DMAR and INTR_REMAP config options") changed all references to DMAR in Kconfig files to INTEL_IOMMU (and, likewise, changed the references to CONFIG_DMAR everywhere else to CONFIG_INTEL_IOMMU). That commit missed one "select DMAR" statement in ia64's Kconfig file. Change that one too. Signed-off-by: Paul Bolle Signed-off-by: Tony Luck diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 9a02f71..e7e55a0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -187,7 +187,7 @@ config IA64_DIG config IA64_DIG_VTD bool "DIG+Intel+IOMMU" - select DMAR + select INTEL_IOMMU select PCI_MSI config IA64_HP_ZX1 -- cgit v0.10.2 From d303e9e98fce56cdb3c6f2ac92f626fc2bd51c77 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 20 Mar 2013 10:30:15 -0700 Subject: Fix initialization of CMCI/CMCP interrupts Back 2010 during a revamp of the irq code some initializations were moved from ia64_mca_init() to ia64_mca_late_init() in commit c75f2aa13f5b268aba369b5dc566088b5194377c Cannot use register_percpu_irq() from ia64_mca_init() But this was hideously wrong. First of all these initializations are now down far too late. Specifically after all the other cpus have been brought up and initialized their own CMC vectors from smp_callin(). Also ia64_mca_late_init() may be called from any cpu so the line: ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ is generally not executed on the BSP, and so the CMC vector isn't setup at all on that processor. Make use of the arch_early_irq_init() hook to get this code executed at just the right moment: not too early, not too late. Reported-by: Fred Hartnett Tested-by: Fred Hartnett Cc: stable@kernel.org # v2.6.37+ Signed-off-by: Tony Luck diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h index 43f96ab..8c70961 100644 --- a/arch/ia64/include/asm/mca.h +++ b/arch/ia64/include/asm/mca.h @@ -143,6 +143,7 @@ extern unsigned long __per_cpu_mca[NR_CPUS]; extern int cpe_vector; extern int ia64_cpe_irq; extern void ia64_mca_init(void); +extern void ia64_mca_irq_init(void); extern void ia64_mca_cpu_init(void *); extern void ia64_os_mca_dispatch(void); extern void ia64_os_mca_dispatch_end(void); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index ad69606..f2c41828 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -23,6 +23,8 @@ #include #include +#include + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -83,6 +85,12 @@ bool is_affinity_mask_valid(const struct cpumask *cpumask) #endif /* CONFIG_SMP */ +int __init arch_early_irq_init(void) +{ + ia64_mca_irq_init(); + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU unsigned int vectors_in_migration[NR_IRQS]; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 65bf9cd..d7396db 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -2074,22 +2074,16 @@ ia64_mca_init(void) printk(KERN_INFO "MCA related initialization done\n"); } + /* - * ia64_mca_late_init - * - * Opportunity to setup things that require initialization later - * than ia64_mca_init. Setup a timer to poll for CPEs if the - * platform doesn't support an interrupt driven mechanism. - * - * Inputs : None - * Outputs : Status + * These pieces cannot be done in ia64_mca_init() because it is called before + * early_irq_init() which would wipe out our percpu irq registrations. But we + * cannot leave them until ia64_mca_late_init() because by then all the other + * processors have been brought online and have set their own CMC vectors to + * point at a non-existant action. Called from arch_early_irq_init(). */ -static int __init -ia64_mca_late_init(void) +void __init ia64_mca_irq_init(void) { - if (!mca_init) - return 0; - /* * Configure the CMCI/P vector and handler. Interrupts for CMC are * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). @@ -2108,6 +2102,23 @@ ia64_mca_late_init(void) /* Setup the CPEI/P handler */ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif +} + +/* + * ia64_mca_late_init + * + * Opportunity to setup things that require initialization later + * than ia64_mca_init. Setup a timer to poll for CPEs if the + * platform doesn't support an interrupt driven mechanism. + * + * Inputs : None + * Outputs : Status + */ +static int __init +ia64_mca_late_init(void) +{ + if (!mca_init) + return 0; register_hotcpu_notifier(&mca_cpu_notifier); -- cgit v0.10.2 From eee46b3de715c9a2a25ebf3c135b88a1d73d92c2 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 21 Mar 2013 11:50:30 +0800 Subject: Fix build error for numa_clear_node() under IA64 numa_clear_node() function is not implemented under IA64, it will be called in unmap_cpu_on_node() in mm/memory_hotplug.c. This cause build error under IA64, this patch adds numa_clear_node() in IA64 to fix this problem. [Added __cpuinit notation to numa_clear_node() to keep linker happy -Tony] Signed-off-by: Yijing Wang Signed-off-by: Tony Luck diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h index 2e27ef1..2db0a6c 100644 --- a/arch/ia64/include/asm/numa.h +++ b/arch/ia64/include/asm/numa.h @@ -67,14 +67,13 @@ extern int paddr_to_nid(unsigned long paddr); extern void map_cpu_to_node(int cpu, int nid); extern void unmap_cpu_from_node(int cpu, int nid); - +extern void numa_clear_node(int cpu); #else /* !CONFIG_NUMA */ #define map_cpu_to_node(cpu, nid) do{}while(0) #define unmap_cpu_from_node(cpu, nid) do{}while(0) - #define paddr_to_nid(addr) 0 - +#define numa_clear_node(cpu) do { } while (0) #endif /* CONFIG_NUMA */ #endif /* _ASM_IA64_NUMA_H */ diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 3efea7d..def782e 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -73,6 +73,11 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) return -1; } +void __cpuinit numa_clear_node(int cpu) +{ + unmap_cpu_from_node(cpu, NUMA_NO_NODE); +} + #ifdef CONFIG_MEMORY_HOTPLUG /* * SRAT information is stored in node_memblk[], then we can use SRAT -- cgit v0.10.2 From 83d435dd6e24b17955f91480f633bb39f0a7c2b4 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Fri, 29 Mar 2013 09:43:47 +0800 Subject: Fix example error_injection_tool I got a "sched_setaffinity:: Invalid argument" error when using err_injection_tool to inject error on a system with over 32 cpus. Error information when injecting an error on a system with over 32 cpus: $ ./err_injection_tool -i /sys/devices/system/cpu/cpu0/err_inject//err_type_info Begine at Tue Mar 26 11:20:08 2013 Configurations: On cpu32: loop=10, interval=5(s) err_type_info=4101,err_struct_info=95 Error sched_setaffinity:: Invalid argument All done This because there is overflow when calculating the cpumask: the type of (1< 31, (1< Signed-off-by: Tony Luck diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt index 223e4f0..9f651c1 100644 --- a/Documentation/ia64/err_inject.txt +++ b/Documentation/ia64/err_inject.txt @@ -882,7 +882,7 @@ int err_inj() cpu=parameters[i].cpu; k = cpu%64; j = cpu/64; - mask[j]=1< Date: Mon, 25 Mar 2013 19:14:16 +0800 Subject: Add size restriction to the kdump documentation In efi_init() memory aligns in IA64_GRANULE_SIZE(16M). If set "crashkernel=1024M-:600M" and use sparse memory model, when crash kernel booting it changes [128M-728M] to [128M-720M]. But initrd memory is in [709M-727M], and virt_addr_valid() *can not* check the invalid pages when freeing initrd memory, because there are some pages missed at the end of the section, and this causes error. ... Unpacking initramfs... Freeing initrd memory: 19648kB freed BUG: Bad page state in process swapper pfn:02d00 page:e0000000102dd800 flags:(null) count:0 mapcount:1 mapping:(null) index:0 Call Trace: [] show_stack+0x80/0xa0 sp=e000000021e8fbd0 bsp=e000000021e81360 [] dump_stack+0x30/0x50 sp=e000000021e8fda0 bsp=e000000021e81348 [] bad_page+0x280/0x380 sp=e000000021e8fda0 bsp=e000000021e81308 [] free_hot_cold_page+0x3a0/0x5c0 sp=e000000021e8fda0 bsp=e000000021e812a0 [] free_hot_page+0x30/0x60 sp=e000000021e8fda0 bsp=e000000021e81280 [] __free_pages+0xb0/0xe0 sp=e000000021e8fda0 bsp=e000000021e81258 [] free_pages+0xa0/0xc0 sp=e000000021e8fda0 bsp=e000000021e81230 [] free_initrd_mem+0x230/0x290 sp=e000000021e8fda0 bsp=e000000021e811d8 [] populate_rootfs+0x1c0/0x280 sp=e000000021e8fdb0 bsp=e000000021e811a0 [] do_one_initcall+0x3b0/0x3e0 sp=e000000021e8fdb0 bsp=e000000021e81158 [] kernel_init+0x3f0/0x4b0 sp=e000000021e8fdb0 bsp=e000000021e81108 [] kernel_thread_helper+0xd0/0x100 sp=e000000021e8fe30 bsp=e000000021e810e0 [] start_kernel_thread+0x20/0x40 sp=e000000021e8fe30 bsp=e000000021e810e0 ... In "http://marc.info/?l=linux-arch&m=136147092429314&w=2" Tony said: "Perhaps in Documentation/kdump/kdump.txt (which the crashkernel entry in kernel-parameters.txt points at). The ia64 section of kdump.txt notes that the start address will be rounded up to a GRANULE boundary, but doesn't talk about restrictions on the size." This patch add size restriction to the documentation of kdump. Signed-off-by: Xishi Qiu Signed-off-by: Tony Luck diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 13f1aa0..9c7fd988 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -297,6 +297,7 @@ Boot into System Kernel On ia64, 256M@256M is a generous value that typically works. The region may be automatically placed on ia64, see the dump-capture kernel config option notes above. + If use sparse memory, the size should be rounded to GRANULE boundaries. On s390x, typically use "crashkernel=xxM". The value of xx is dependent on the memory consumption of the kdump system. In general this is not -- cgit v0.10.2