From 34f2c0ac111aaf090c7d2432dab532640870733a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 1 Apr 2012 16:38:46 -0400 Subject: tile: fix multiple build failures from system.h dismantle Commit bd119c69239322caafdb64517a806037d0d0c70a "Disintegrate asm/system.h for Tile" created the asm/switch_to.h file, but did not add an include of it to all its users. Also, commit b4816afa3986704d1404fc48e931da5135820472 "Move the asm-generic/system.h xchg() implementation to asm-generic/cmpxchg.h" introduced the concept of asm/cmpxchg.h but the tile arch never got one. Fork the cmpxchg content out of the asm/atomic.h file to create one. Acked-by: David Howells Signed-off-by: Paul Gortmaker Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index bb696da..f246142 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,6 +17,8 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include + #ifndef __ASSEMBLY__ #include @@ -121,54 +123,6 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define xchg(ptr, x) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ - break; \ - default: \ - __xchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define cmpxchg(ptr, o, n) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ - break; \ - default: \ - __cmpxchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define tas(ptr) (xchg((ptr), 1)) - #endif /* __ASSEMBLY__ */ #ifndef __tilegx__ diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 0000000..276f067 --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,73 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +/* Nonexistent functions intended to cause link errors. */ +extern unsigned long __xchg_called_with_bad_pointer(void); +extern unsigned long __cmpxchg_called_with_bad_pointer(void); + +#define xchg(ptr, x) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((x)-(x)))(x)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((x)-(x)))(x)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((o)-(o)))(o), \ + (u32)(typeof((n)-(n)))(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((o)-(o)))(o), \ + (u64)(typeof((n)-(n)))(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define tas(ptr) (xchg((ptr), 1)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 30caeca..ee01b1c 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 37ee4d0..0be6b01 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 8d6951439ef524683057251f1231df232046b6b6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:47:57 -0400 Subject: arch/tile/Kconfig: remove pointless "!M386" test. Looks like a cut and paste bug from the x86 version. Signed-off-by: Chris Metcalf diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 11270ca..30413c1 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,7 +12,7 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select SYS_HYPERVISOR - select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 + select ARCH_HAVE_NMI_SAFE_CMPXCHG # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT -- cgit v0.10.2 From 3d1e8a81cc81e62d13731e48c40760e60f31b0d5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:53:30 -0400 Subject: arch/tile/Kconfig: rename tile_defconfig to tilepro_defconfig We switched to using "tilepro" for the 32-bit stuff a while ago, but missed this one usage. Signed-off-by: Chris Metcalf diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 30413c1..30393aa 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -69,6 +69,9 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT def_bool y +config NEED_DMA_MAP_STATE + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -118,7 +121,7 @@ config 64BIT config ARCH_DEFCONFIG string - default "arch/tile/configs/tile_defconfig" if !TILEGX + default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX source "init/Kconfig" -- cgit v0.10.2 From 884197f7ea93dc2faf399060dc29cb0dbbda6937 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:56:04 -0400 Subject: arch/tile/Kconfig: don't specify CONFIG_PAGE_OFFSET for 64-bit builds It's fixed at half the VA space and there's no point in configuring it. Signed-off-by: Chris Metcalf diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 30393aa..96033e2 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -243,6 +243,7 @@ endchoice config PAGE_OFFSET hex + depends on !64BIT default 0xF0000000 if VMSPLIT_3_75G default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_2_75G -- cgit v0.10.2 From 327e8b6b25588ab906856a4e506b28c59422f11b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:04:57 -0400 Subject: arch/tile: fix typo in We aren't yet using this definition in the kernel, but fix it up before someone goes looking for it. Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index f548efe..d6ba449 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -60,8 +60,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) -- cgit v0.10.2 From 07feea877d18453bbe4ad47fe2a365eebf56a7af Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:10:03 -0400 Subject: arch/tile: revert comment for atomic64_add_unless(). It still returns whether @v was not @u, not the old value, unlike __atomic_add_unless(). Signed-off-by: Chris Metcalf Acked-by: Arun Sharma diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 466dc4a..54d1da8 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -200,7 +200,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. + * Returns non-zero if @v was not @u, and zero otherwise. */ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) { -- cgit v0.10.2 From 664c100bce0070148131f8d49518015476c03cae Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:17:05 -0400 Subject: arch/tile: fix gcc 4.6 warnings in Fix some signedness and variable usage warnings in change_bit() and test_and_change_bit(). Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index 58d021a..60b87ee 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -38,10 +38,10 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; - old = *addr; + oldval = *addr; do { guess = oldval; oldval = atomic64_cmpxchg((atomic64_t *)addr, @@ -85,7 +85,7 @@ static inline int test_and_change_bit(unsigned nr, volatile unsigned long *addr) { unsigned long mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval = *addr; + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; oldval = *addr; do { -- cgit v0.10.2 From cbe224705e573cead57c4d4bed0c91efb564a344 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 15:21:00 -0400 Subject: arch/tile: use 0 for IRQ_RESCHEDULE instead of 1 This avoids assigning IRQ 0 to PCI devices, because we've seen that doesn't always work well. Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index f80f8ce..33cff9a 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -21,7 +21,7 @@ #define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 #define irq_canonicalize(irq) (irq) -- cgit v0.10.2 From b287f69676a34a9fc341de4d79a9c74e1959dec6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:02:52 -0400 Subject: arch/tile: avoid false corrupt frame warning in early boot With lockstat we can end up trying to get a backtrace before "high_memory" is initialized, so don't worry about range testing if it is zero. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ee01b1c..2d5ef61 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -286,7 +286,7 @@ struct task_struct *validate_current(void) static struct task_struct corrupt = { .comm = "" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); tsk = &corrupt; -- cgit v0.10.2 From efb734d8ed040b053f53fd53589ed5d9c9b5cd04 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:05:04 -0400 Subject: arch/tile: make sure to build memcpy_user_64 without frame pointer Add a comment explaining why this is important, and add a CFLAGS_REMOVE clause to the Makefile to make sure it happens. Signed-off-by: Chris Metcalf diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086..985f598 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer lib-y += memcpy_user_64.o else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3a..37440ca 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -14,7 +14,13 @@ * Do memcpy(), but trap and return "n" when a load or store faults. * * Note: this idiom only works when memcpy() compiles to a leaf function. - * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. * * Also note that we are capturing "n" from the containing scope here. */ -- cgit v0.10.2 From 5f639fdcd8c186c8128c616e94a7e7b159c968ae Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:06:14 -0400 Subject: arch/tile: various bugs in stack backtracer Fix a long-standing bug in the stack backtracer where we would print garbage to the console instead of kernel function names, if the kernel wasn't built with symbol support (e.g. mboot). Make sure to tag every line of userspace backtrace output if we actually have the mmap_sem, since that way if there's no tag, we know that it's because we couldn't trylock the semaphore. Stop doing a TLB flush and examining page tables during backtrace. Instead, just trust that __copy_from_user_inatomic() will properly fault and return a failure, which it should do in all cases. Fix a latent bug where the backtracer would directly examine a signal context in user space, rather than copying it safely to kernel memory first. This meant that a race with another thread could potentially have caused a kernel panic. Guard against unaligned sp when trying to restart backtrace at an interrupt or signal handler point in the kernel backtracer. Report kernel symbolic information for the call instruction rather than for the following instruction. We still report the actual numeric address corresponding to the instruction after the call, for the sake of consistency with the normal expectations for stack backtracers. Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2d..0e9d382 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 0be6b01..b2f44c2 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,9 +21,10 @@ #include #include #include +#include +#include #include #include -#include #include #include #include @@ -45,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -128,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -170,27 +124,27 @@ static int is_sigreturn(unsigned long pc) } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" \n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -203,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -266,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -346,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -354,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; if (headers) { /* @@ -370,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -409,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); } EXPORT_SYMBOL(tile_show_stack); -- cgit v0.10.2 From e17235382dbb05f70146e141e4b780fd069050dc Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:52:00 -0400 Subject: arch/tile: work around a hardware issue with the return-address stack In certain circumstances we need to do a bunch of jump-and-link instructions to fill the hardware return-address stack with nonzero values. Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f20f92..e28c3df4 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -64,7 +64,11 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 79c93e1..2c181c8 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -1156,6 +1156,18 @@ int_unalign: push_extra_callee_saves r0 j do_trap +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 2bb6602..32acfd9 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -289,7 +289,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; code = SEGV_MAPERR; if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) @@ -297,6 +300,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, else address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); -- cgit v0.10.2 From a714ffff36a581756ec3b001f47e8e5e96a9fa0e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:23:54 -0400 Subject: arch/tile: fix up some minor trap handling issues We now respond to MEM_ERROR traps (e.g. an atomic instruction to non-cacheable memory) with a SIGBUS. We also no longer generate a console crash message if a user process die due to a SIGTRAP. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 2c181c8..005535d 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -1178,7 +1178,7 @@ STD_ENTRY(fill_ra_stack) #define do_hardwall_trap bad_intr #endif - int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr #if CONFIG_KERNEL_PL == 2 int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 32acfd9..73cff81 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -200,7 +200,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, { siginfo_t info = { 0 }; int signo, code; - unsigned long address; + unsigned long address = 0; bundle_bits instr; /* Re-enable interrupts. */ @@ -223,6 +223,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -312,7 +316,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; - trace_unhandled_signal("trap", regs, address, signo); + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } -- cgit v0.10.2 From 51bcdf8879f7920946c90087e6160680812a44bd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:29:28 -0400 Subject: arch/tile: fix a couple of comments that needed updating Not associated with any code changes, so I'm just lumping these comment changes into a commit by themselves. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 5f85d8b..023e2e1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -913,6 +913,13 @@ void __cpuinit setup_cpu(int boot) #ifdef CONFIG_BLK_DEV_INITRD +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ static int __initdata set_initramfs_file; static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; @@ -928,9 +935,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. + * We look for an "initramfs.cpio.gz" file in the hvfs. * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * unpacked to the initramfs. */ static void __init load_hv_initrd(void) { diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cba30e9..cac17c4 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -130,7 +130,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) } /* - * Handle a fault on the vmalloc or module mapping area + * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { -- cgit v0.10.2 From 6731aa9eae3e94b094a44d2aea02387a39202fbc Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:30:19 -0400 Subject: arch/tile/Makefile: use KCFLAGS when figuring out the libgcc path. Signed-off-by: Chris Metcalf diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce7..5e4d3b9 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -30,7 +30,8 @@ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif -LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) # Provide the path to use for "make defconfig". KBUILD_DEFCONFIG := $(ARCH)_defconfig -- cgit v0.10.2 From 48292738d06d7b46d861652ef59bd03be931c2c9 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:34:52 -0400 Subject: arch/tile: don't wait for migrating PTEs in an NMI handler Doing so raises the possibility of self-deadlock if we are waiting for a backtrace for an oprofile or perf interrupt while we are in the middle of migrating our own stack page. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cac17c4..a1da473 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -203,9 +203,14 @@ static pgd_t *get_current_pgd(void) * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, + unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; @@ -227,6 +232,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num, pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; wait_for_migration(pte); return 1; } @@ -300,7 +307,7 @@ static int handle_page_fault(struct pt_regs *regs, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; @@ -665,7 +672,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, */ if (fault_num == INT_DTLB_ACCESS) write = 1; - if (handle_migrating_pte(pgd, fault_num, address, 1, write)) + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) return state; /* Return zero so that we continue on with normal fault handling. */ -- cgit v0.10.2 From 12400f1f22ad7651efa1d3d06dd8b6b178d19ea3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:36:53 -0400 Subject: arch/tile: don't set the homecache of a PTE unless appropriate We make sure not to try to set the home for an MMIO PTE (on tilegx) or a PTE that isn't referencing memory managed by Linux. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 8730369..6b9a109 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -469,10 +469,18 @@ void __set_pte(pte_t *ptep, pte_t pte) void set_pte(pte_t *ptep, pte_t pte) { - struct page *page = pfn_to_page(pte_pfn(pte)); - - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } __set_pte(ptep, pte); } -- cgit v0.10.2 From b230ff2d5c53bb79e1121554cd3c827e5c1b70fd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:40:50 -0400 Subject: arch/tile: don't enable irqs unconditionally in page fault handler If we took a page fault while we had interrupts disabled, we shouldn't enable them in the page fault handler. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index a1da473..22e58f5 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -342,9 +342,12 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here. + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. */ - local_irq_enable(); + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); mm = tsk->mm; -- cgit v0.10.2 From 7a7039ee71811222310b431aee246eb78dd0d401 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:42:27 -0400 Subject: arch/tile: fix bug in loading kernels larger than 16 MB Previously we only handled kernels up to a single huge page in size. Now we create additional PTEs appropriately. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 830c490..8400d3f 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -557,6 +557,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; @@ -579,10 +580,15 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } BUG_ON(address != (unsigned long)_stext); - pfn = 0; /* code starts at PA 0 */ - pte = alloc_pte(); - for (pte_ofs = 0; address < (unsigned long)_einittext; - pfn++, pte_ofs++, address += PAGE_SIZE) { + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); @@ -591,7 +597,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } pte[pte_ofs] = pfn_pte(pfn, prot); } - assign_pte(pmd, pte); + if (pte) + assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); @@ -614,7 +621,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); - *(pte_t *)pmd = pteval; + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ -- cgit v0.10.2 From 444eef1ba40546690a77b2af4cba7d4561e7bba5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:43:20 -0400 Subject: arch/tile: fix bug in delay_backoff() We were carefully computing a value to use for the number of loops to spin for, and then ignoring it. Signed-off-by: Chris Metcalf diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c101098..6ac3750 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } -- cgit v0.10.2 From 5f220704127ae70db519fabbda4ece649eadac7f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:44:10 -0400 Subject: arch/tile: don't leak kernel memory when we unload modules We were failing to track the memory when we allocated it. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index b90ab99..98d4769 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -67,6 +67,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); -- cgit v0.10.2 From 719ea79e330c5e1a17fb7e4cf352a81e4c84cff5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:50:08 -0400 Subject: arch/tile: fix up locking in pgtable.c slightly We should be holding the init_mm.page_table_lock in shatter_huge_page() since we are modifying the kernel page tables. Then, only if we are walking the other root page tables to update them, do we want to take the pgd_lock. Add a comment about taking the pgd_lock that we always do it with interrupts disabled and therefore are not at risk from the tlbflush IPI deadlock as is seen on x86. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 6b9a109..2410aa8 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -177,14 +177,10 @@ void shatter_huge_page(unsigned long addr) if (!pmd_huge_page(*pmd)) return; - /* - * Grab the pgd_lock, since we may need it to walk the pgd_list, - * and since we need some kind of lock here to avoid races. - */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock_irqsave(&init_mm.page_table_lock, flags); if (!pmd_huge_page(*pmd)) { /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); return; } @@ -194,6 +190,7 @@ void shatter_huge_page(unsigned long addr) #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); list_for_each(pos, &pgd_list) { pmd_t *copy_pmd; pgd = list_to_pgd(pos) + pgd_index(addr); @@ -201,6 +198,7 @@ void shatter_huge_page(unsigned long addr) copy_pmd = pmd_offset(pud, addr); __set_pmd(copy_pmd, *pmd); } + spin_unlock(&pgd_lock); #endif /* Tell every cpu to notice the change. */ @@ -208,7 +206,7 @@ void shatter_huge_page(unsigned long addr) cpu_possible_mask, NULL, 0); /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); } /* @@ -217,9 +215,13 @@ void shatter_huge_page(unsigned long addr) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * The locking scheme was chosen on the basis of manfred's - * recommendations and having no core impact whatsoever. - * -- wli + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. */ DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); -- cgit v0.10.2 From bfffe79bc29a9c4c817d5f51590961220e26db1a Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:56:18 -0400 Subject: arch/tile: use proper memparse() for "maxmem" options This is more standard and avoids having to remember what units the options actually take. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 023e2e1..f3598e7 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -103,13 +103,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +117,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); -- cgit v0.10.2 From 8c92ba6c327ee5089dec1e92eaa82927bee63d6d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:57:18 -0400 Subject: arch/tile: add "nop" after "nap" to help GX idle power draw This avoids the hardware istream prefetcher doing unnecessary work. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae..ec91568 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -85,6 +85,7 @@ STD_ENTRY(cpu_idle_on_new_stack) /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -105,5 +106,6 @@ STD_ENTRY(_cpu_idle) .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a44e103..7b6df8c 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void) set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ -- cgit v0.10.2 From cb210ee3a81afab7c64777635cc18899a2bdd9a5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:59:11 -0400 Subject: arch/tile: implement panic_smp_self_stop() This allows the later-panicking tiles to wait in a lower power state until they get interrupted with an smp_send_stop(). Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 7b6df8c..91da0f7 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -113,6 +113,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. -- cgit v0.10.2 From 2858f856021340f3730fa8639dd520a2e4331f7f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:11:09 -0400 Subject: arch/tile: fix single-stepping over swint1 instructions on tilegx If we are single-stepping and make a syscall, we call ptrace_notify() explicitly on the return path back to user space, since we are returning to a pc value set artificially to the next instruction, and otherwise we won't register that we stepped over the syscall instruction (swint1). Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 005535d..fdff17c 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1039,11 +1040,25 @@ handle_syscall: /* Do syscall trace again, if requested. */ ld r30, r31 - andi r30, r30, _TIF_SYSCALL_TRACE - beqzt r30, 1f + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: j .Lresume_userspace /* jump into middle of interrupt_return */ .Lcompat_syscall: /* -- cgit v0.10.2 From 918cbd38aef83de3a2516299bcb230caf59462a0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:14:40 -0400 Subject: arch/tile: fix pointer cast in cacheflush.c Pragmatically it couldn't be wrong to cast pointers to long to compare them (since all kernel addresses are in the top half of VA space), but it's more correct to cast to unsigned long. Signed-off-by: Chris Metcalf diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aac..6af2b97 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -109,7 +109,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* -- cgit v0.10.2 From e81510e0c3800dc730e0c544e3949f7bde368c2b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:19:45 -0400 Subject: arch/tile: export the page_home() function. This avois a bug in modules trying to use the function. Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 1cc6ae4..499f737 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -394,6 +394,7 @@ int page_home(struct page *page) return pte_to_home(*virt_to_pte(NULL, kva)); } } +EXPORT_SYMBOL(page_home); void homecache_change_page_home(struct page *page, int order, int home) { -- cgit v0.10.2 From b14f21906774be181627412fed5b6b5fae2b53a2 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:29:40 -0400 Subject: arch/tile: stop mentioning the "kvm" subdirectory It causes "make clean" to fail, for example. Once we have KVM support complete, we'll reinstate the subdir reference. Signed-off-by: Chris Metcalf diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 5e4d3b9..9520bc5 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -54,8 +54,6 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ -core-$(CONFIG_KVM) += arch/tile/kvm/ - ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot endif -- cgit v0.10.2 From ab306cae660e524edbeb8889e4e23d3c97717b9c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:46:29 -0400 Subject: arch/tile: use atomic exchange in arch_write_unlock() This idiom is used elsewhere when we do an unlock by writing a zero, but I missed it here. Using an atomic operation avoids waiting on the write buffer for the unlocking write to be sent to the home cache. Signed-off-by: Chris Metcalf diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 72be590..5f8b6a0 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { __insn_mf(); - rw->lock = 0; + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ } static inline int arch_read_trylock(arch_rwlock_t *rw) -- cgit v0.10.2 From 54229ff359250ce7292dbeb59f157a2d3b67e30c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:47:38 -0400 Subject: arch/tile: fix finv_buffer_remote() for tilegx There were some correctness issues with this code that are now fixed with this change. The change is likely less performant than it could be, but it should no longer be vulnerable to any races with memory operations on the memory network while invalidating a range of memory. This code is run infrequently so performance isn't critical, but correctness definitely is. Signed-off-by: Chris Metcalf diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 6af2b97..db4fb89 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing @@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); -- cgit v0.10.2 From cdd8e16feba87a3fc2bb1885d36f895a2a3288bf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:24:41 -0400 Subject: arch/tile: return SIGBUS for addresses that are unaligned AND invalid Previously we were returning SIGSEGV in this case. It seems cleaner to return SIGBUS since the hardware figures out alignment traps before TLB violations, so SIGBUS is the "more correct" signal. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index bc1eb58..9efbc13 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -153,6 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + #ifndef __LITTLE_ENDIAN # error We assume little-endian representation with copy_xx_user size 2 here #endif @@ -192,18 +211,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tile_bundle_bits) 0; } - if (unaligned_fixup == 0) { - siginfo_t info = { - .si_signo = SIGBUS, - .si_code = BUS_ADRALN, - .si_addr = addr - }; - trace_unhandled_signal("unaligned trap", regs, - (unsigned long)addr, SIGBUS); - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - if (unaligned_printk || unaligned_fixup_count == 0) { pr_info("Process %d/%s: PC %#lx: Fixup of" " unaligned %s at %#lx.\n", -- cgit v0.10.2 From b1760c847ff9d04fba7cdbef005a0ad805311c6d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:27:20 -0400 Subject: arch/tile: remove bogus performance optimization We were re-homing the initial task's kernel stack on the boot cpu, but in fact it's better to let it stay globally homed, since that task isn't bound to the boot cpu anyway. This is more of a general cleanup than an actual performance optimization, but it removes code, which is a good thing. :-) Signed-off-by: Chris Metcalf diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 8400d3f..6a9d20d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -254,11 +254,6 @@ static pgprot_t __init init_pgprot(ulong address) return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); } - /* As a performance optimization, keep the boot init stack here. */ - if (address >= (ulong)&init_thread_union && - address < (ulong)&init_thread_union + THREAD_SIZE) - return construct_pgprot(PAGE_KERNEL, smp_processor_id()); - #ifndef __tilegx__ #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ -- cgit v0.10.2 From e1d5c0195075abaa45cd04ca397dbeaa0d18c490 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:29:06 -0400 Subject: arch/tile: avoid accidentally unmasking NMI-type interrupt accidentally The return path as we reload registers and core state requires that r30 hold a boolean indicating whether we are returning from an NMI, but in a couple of cases we weren't setting this properly, with the result that we could accidentally unmask the NMI interrupt(s), which could cause confusion. Now we set r30 in every place where we jump into the interrupt return path. Signed-off-by: Chris Metcalf diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index aecc8ed..5d56a1e 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -799,6 +799,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1237,7 +1241,10 @@ handle_syscall: bzt r30, 1f jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1246,7 +1253,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1262,7 +1272,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* @@ -1376,7 +1389,10 @@ handle_ill: jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index fdff17c..49d9d66 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -606,6 +606,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1058,7 +1062,10 @@ handle_syscall: } FEEDBACK_REENTER(handle_syscall) -2: j .Lresume_userspace /* jump into middle of interrupt_return */ +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Lcompat_syscall: /* @@ -1092,7 +1099,10 @@ handle_syscall: movei r28, -ENOSYS } st r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1108,7 +1118,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* Various stub interrupt handlers and syscall handlers */ -- cgit v0.10.2 From e2e110d7596656e2badd21c48713bd01e1b40f44 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 18:58:37 -0400 Subject: edac: say "TILEGx" not "TILEPro" for the tilegx edac driver This is just an aesthetic change but it was silly to say TILEPro when booting up on the tilegx architecture. Signed-off-by: Chris Metcalf diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c index 1d5cf06..e99d009 100644 --- a/drivers/edac/tile_edac.c +++ b/drivers/edac/tile_edac.c @@ -145,7 +145,11 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev) mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->mod_name = DRV_NAME; +#ifdef __tilegx__ + mci->ctl_name = "TILEGx_Memory_Controller"; +#else mci->ctl_name = "TILEPro_Memory_Controller"; +#endif mci->dev_name = dev_name(&pdev->dev); mci->edac_check = tile_edac_check; -- cgit v0.10.2 From 16418bb1da8a685e639c6c4f2d3096f762e8b0fb Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 19:03:04 -0400 Subject: tile-srom.c driver: minor code cleanup Signed-off-by: Chris Metcalf diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c index 4dc0194..3b22a60 100644 --- a/drivers/char/tile-srom.c +++ b/drivers/char/tile-srom.c @@ -194,17 +194,17 @@ static ssize_t srom_read(struct file *filp, char __user *buf, hv_retval = _srom_read(srom->hv_devhdl, kernbuf, *f_pos, bytes_this_pass); - if (hv_retval > 0) { - if (copy_to_user(buf, kernbuf, hv_retval) != 0) { - retval = -EFAULT; - break; - } - } else if (hv_retval <= 0) { + if (hv_retval <= 0) { if (retval == 0) retval = hv_retval; break; } + if (copy_to_user(buf, kernbuf, hv_retval) != 0) { + retval = -EFAULT; + break; + } + retval += hv_retval; *f_pos += hv_retval; buf += hv_retval; -- cgit v0.10.2 From 92795672898d5ee1faa557c498c078123d20e827 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 19:23:35 -0400 Subject: tilepro ethernet driver: fix a few minor issues This commit fixes a number of issues seen with the driver: - Improve handling of return credits to the hardware shim - Use skb_frag_size() appropriately - Fix driver so it works properly with netpoll for console over UDP Signed-off-by: Chris Metcalf diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 261356c..75e5905 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -342,6 +342,21 @@ inline int __netio_fastio1(u32 fastio_index, u32 arg0) } +static void tile_net_return_credit(struct tile_net_cpu *info) +{ + struct tile_netio_queue *queue = &info->queue; + netio_queue_user_impl_t *qup = &queue->__user_part; + + /* Return four credits after every fourth packet. */ + if (--qup->__receive_credit_remaining == 0) { + u32 interval = qup->__receive_credit_interval; + qup->__receive_credit_remaining = interval; + __netio_fastio_return_credits(qup->__fastio_index, interval); + } +} + + + /* * Provide a linux buffer to LIPP. */ @@ -864,19 +879,11 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) stats->rx_packets++; stats->rx_bytes += len; - - if (small) - info->num_needed_small_buffers++; - else - info->num_needed_large_buffers++; } - /* Return four credits after every fourth packet. */ - if (--qup->__receive_credit_remaining == 0) { - u32 interval = qup->__receive_credit_interval; - qup->__receive_credit_remaining = interval; - __netio_fastio_return_credits(qup->__fastio_index, interval); - } + /* ISSUE: It would be nice to defer this until the packet has */ + /* actually been processed. */ + tile_net_return_credit(info); /* Consume this packet. */ qup->__packet_receive_read = index2; @@ -1543,7 +1550,7 @@ static int tile_net_drain_lipp_buffers(struct tile_net_priv *priv) /* Drain all the LIPP buffers. */ while (true) { - int buffer; + unsigned int buffer; /* NOTE: This should never fail. */ if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer, @@ -1707,7 +1714,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, if (!hash_default) { void *va = pfn_to_kaddr(pfn) + f->page_offset; BUG_ON(PageHighMem(skb_frag_page(f))); - finv_buffer_remote(va, f->size, 0); + finv_buffer_remote(va, skb_frag_size(f), 0); } cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; @@ -1735,8 +1742,8 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, * Sometimes, if "sendfile()" requires copying, we will be called with * "data" containing the header and payload, with "frags" being empty. * - * In theory, "sh->nr_frags" could be 3, but in practice, it seems - * that this will never actually happen. + * Sometimes, for example when using NFS over TCP, a single segment can + * span 3 fragments, which must be handled carefully in LEPP. * * See "emulate_large_send_offload()" for some reference code, which * does not handle checksumming. @@ -1844,10 +1851,8 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->eq_lock, irqflags); - /* - * Handle completions if needed to make room. - * HACK: Spin until there is sufficient room. - */ + /* Handle completions if needed to make room. */ + /* NOTE: Return NETDEV_TX_BUSY if there is still no room. */ if (lepp_num_free_comp_slots(eq) == 0) { nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); if (nolds == 0) { @@ -1861,6 +1866,7 @@ busy: cmd_tail = eq->cmd_tail; /* Prepare to advance, detecting full queue. */ + /* NOTE: Return NETDEV_TX_BUSY if the queue is full. */ cmd_next = cmd_tail + cmd_size; if (cmd_tail < cmd_head && cmd_next >= cmd_head) goto busy; @@ -2023,10 +2029,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->eq_lock, irqflags); - /* - * Handle completions if needed to make room. - * HACK: Spin until there is sufficient room. - */ + /* Handle completions if needed to make room. */ + /* NOTE: Return NETDEV_TX_BUSY if there is still no room. */ if (lepp_num_free_comp_slots(eq) == 0) { nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); if (nolds == 0) { @@ -2040,6 +2044,7 @@ busy: cmd_tail = eq->cmd_tail; /* Copy the commands, or fail. */ + /* NOTE: Return NETDEV_TX_BUSY if the queue is full. */ for (i = 0; i < num_frags; i++) { /* Prepare to advance, detecting full queue. */ @@ -2261,6 +2266,23 @@ static int tile_net_get_mac(struct net_device *dev) return 0; } + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* + * Polling 'interrupt' - used by things like netconsole to send skbs + * without having to re-enable interrupts. It's not called while + * the interrupt routine is executing. + */ +static void tile_net_netpoll(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + disable_percpu_irq(priv->intr_id); + tile_net_handle_ingress_interrupt(priv->intr_id, dev); + enable_percpu_irq(priv->intr_id, 0); +} +#endif + + static const struct net_device_ops tile_net_ops = { .ndo_open = tile_net_open, .ndo_stop = tile_net_stop, @@ -2269,7 +2291,10 @@ static const struct net_device_ops tile_net_ops = { .ndo_get_stats = tile_net_get_stats, .ndo_change_mtu = tile_net_change_mtu, .ndo_tx_timeout = tile_net_tx_timeout, - .ndo_set_mac_address = tile_net_set_mac_address + .ndo_set_mac_address = tile_net_set_mac_address, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tile_net_netpoll, +#endif }; @@ -2409,7 +2434,7 @@ static void tile_net_cleanup(void) */ static int tile_net_init_module(void) { - pr_info("Tilera IPP Net Driver\n"); + pr_info("Tilera Network Driver\n"); tile_net_devs[0] = tile_net_dev_init("xgbe0"); tile_net_devs[1] = tile_net_dev_init("xgbe1"); -- cgit v0.10.2 From 91445c72cac7cb48f383fc6e44221e244cc32f6e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 31 Mar 2012 09:46:03 -0400 Subject: MAINTAINERS: update EDAC information The bluesmoke mailing list no longer works, so use linux-edac@vger.kernel.org. And, use a less restrictive pattern so all drivers/edac changes go to linux-edac as well. Borislav suggested I just push this through the tile tree since there is currently no core edac maintainer (emails to Doug Thompson bounce). Acked-by: Borislav Petkov Signed-off-by: Chris Metcalf diff --git a/MAINTAINERS b/MAINTAINERS index eecf344..ef04eaa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2451,17 +2451,17 @@ F: fs/ecryptfs/ EDAC-CORE M: Doug Thompson -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Supported F: Documentation/edac.txt -F: drivers/edac/edac_* +F: drivers/edac/ F: include/linux/edac.h EDAC-AMD64 M: Doug Thompson M: Borislav Petkov -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Supported F: drivers/edac/amd64_edac* @@ -2469,35 +2469,35 @@ F: drivers/edac/amd64_edac* EDAC-E752X M: Mark Gross M: Doug Thompson -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e752x_edac.c EDAC-E7XXX M: Doug Thompson -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e7xxx_edac.c EDAC-I82443BXGX M: Tim Small -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82443bxgx_edac.c EDAC-I3000 M: Jason Uhlenkott -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i3000_edac.c EDAC-I5000 M: Doug Thompson -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5000_edac.c @@ -2526,21 +2526,21 @@ F: drivers/edac/i7core_edac.c EDAC-I82975X M: Ranganathan Desikan M: "Arvind R." -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82975x_edac.c EDAC-PASEMI M: Egor Martovetsky -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/pasemi_edac.c EDAC-R82600 M: Tim Small -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/r82600_edac.c -- cgit v0.10.2 From 00a62d4bc9b9a0388abee5c5ea946b9631b149d5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 2 Apr 2012 13:17:37 -0400 Subject: drivers/net/ethernet/tile: fix netdev_alloc_skb() bombing Commit dae2e9f430c46c29e3f771110094bd3da3625aa4 changed dev_alloc_skb() to netdev_alloc_skb(), adding a dev pointer, but erroneously used "->" instead of "." for a struct member when accessing the dev pointer. This change fixes the build breakage. Signed-off-by: Chris Metcalf diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 75e5905..3d501ec 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -448,7 +448,7 @@ static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, struct sk_buff **skb_ptr; /* Request 96 extra bytes for alignment purposes. */ - skb = netdev_alloc_skb(info->napi->dev, len + padding); + skb = netdev_alloc_skb(info->napi.dev, len + padding); if (skb == NULL) return false; -- cgit v0.10.2