From a0dce7f0ac66cdd5b653a3b059eb1382c2bdf8a0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:30:39 -0800 Subject: drivers/memstick/core/ms_block.c: fix spelling of MSB_RP_RECIVE_STATUS_REG Cc: Maxim Levitsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 9188ef5..24f2f84 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -401,10 +401,10 @@ again: sizeof(struct ms_status_register))) return 0; - msb->state = MSB_RP_RECIVE_STATUS_REG; + msb->state = MSB_RP_RECEIVE_STATUS_REG; return 0; - case MSB_RP_RECIVE_STATUS_REG: + case MSB_RP_RECEIVE_STATUS_REG: msb->regs.status = *(struct ms_status_register *)mrq->data; msb->state = MSB_RP_SEND_OOB_READ; /* fallthrough */ diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h index 96e6375..c75198d 100644 --- a/drivers/memstick/core/ms_block.h +++ b/drivers/memstick/core/ms_block.h @@ -223,7 +223,7 @@ enum msb_readpage_states { MSB_RP_RECEIVE_INT_REQ_RESULT, MSB_RP_SEND_READ_STATUS_REG, - MSB_RP_RECIVE_STATUS_REG, + MSB_RP_RECEIVE_STATUS_REG, MSB_RP_SEND_OOB_READ, MSB_RP_RECEIVE_OOB_READ, -- cgit v0.10.2 From b77d88d493b8fc7a4c2dadd3bb86d1dee2f53a56 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:40 -0800 Subject: mm: drop actor argument of do_generic_file_read() There's only one caller of do_generic_file_read() and the only actor is file_read_actor(). No reason to have a callback parameter. Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Reviewed-by: Wanpeng Li Cc: Matthew Wilcox Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/filemap.c b/mm/filemap.c index ae4846f..b7749a9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1090,7 +1090,6 @@ static void shrink_readahead_size_eio(struct file *filp, * @filp: the file to read * @ppos: current file position * @desc: read_descriptor - * @actor: read method * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. @@ -1099,7 +1098,7 @@ static void shrink_readahead_size_eio(struct file *filp, * of the logic when it comes to error handling etc. */ static void do_generic_file_read(struct file *filp, loff_t *ppos, - read_descriptor_t *desc, read_actor_t actor) + read_descriptor_t *desc) { struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; @@ -1200,13 +1199,14 @@ page_ok: * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * - * The actor routine returns how many bytes were actually used.. + * The file_read_actor routine returns how many bytes were + * actually used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ - ret = actor(desc, page, offset, nr); + ret = file_read_actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; @@ -1479,7 +1479,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (desc.count == 0) continue; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; -- cgit v0.10.2 From e9bb18c7b95d4dcf8c7f0e14f920ca6f03109e75 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:42 -0800 Subject: mm: avoid increase sizeof(struct page) due to split page table lock Alex Thorlton noticed that some massively threaded workloads work poorly, if THP enabled. This patchset fixes this by introducing split page table lock for PMD tables. hugetlbfs is not covered yet. This patchset is based on work by Naoya Horiguchi. : akpm result summary: : : THP off, v3.12-rc2: 18.059261877 seconds time elapsed : THP off, patched: 16.768027318 seconds time elapsed : : THP on, v3.12-rc2: 42.162306788 seconds time elapsed : THP on, patched: 8.397885779 seconds time elapsed : : HUGETLB, v3.12-rc2: 47.574936948 seconds time elapsed : HUGETLB, patched: 19.447481153 seconds time elapsed THP off, v3.12-rc2: ------------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 1037072.835207 task-clock # 57.426 CPUs utilized ( +- 3.59% ) 95,093 context-switches # 0.092 K/sec ( +- 3.93% ) 140 cpu-migrations # 0.000 K/sec ( +- 5.28% ) 10,000,550 page-faults # 0.010 M/sec ( +- 0.00% ) 2,455,210,400,261 cycles # 2.367 GHz ( +- 3.62% ) [83.33%] 2,429,281,882,056 stalled-cycles-frontend # 98.94% frontend cycles idle ( +- 3.67% ) [83.33%] 1,975,960,019,659 stalled-cycles-backend # 80.48% backend cycles idle ( +- 3.88% ) [66.68%] 46,503,296,013 instructions # 0.02 insns per cycle # 52.24 stalled cycles per insn ( +- 3.21% ) [83.34%] 9,278,997,542 branches # 8.947 M/sec ( +- 4.00% ) [83.34%] 89,881,640 branch-misses # 0.97% of all branches ( +- 1.17% ) [83.33%] 18.059261877 seconds time elapsed ( +- 2.65% ) THP on, v3.12-rc2: ------------------ Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 3114745.395974 task-clock # 73.875 CPUs utilized ( +- 1.84% ) 267,356 context-switches # 0.086 K/sec ( +- 1.84% ) 99 cpu-migrations # 0.000 K/sec ( +- 1.40% ) 58,313 page-faults # 0.019 K/sec ( +- 0.28% ) 7,416,635,817,510 cycles # 2.381 GHz ( +- 1.83% ) [83.33%] 7,342,619,196,993 stalled-cycles-frontend # 99.00% frontend cycles idle ( +- 1.88% ) [83.33%] 6,267,671,641,967 stalled-cycles-backend # 84.51% backend cycles idle ( +- 2.03% ) [66.67%] 117,819,935,165 instructions # 0.02 insns per cycle # 62.32 stalled cycles per insn ( +- 4.39% ) [83.34%] 28,899,314,777 branches # 9.278 M/sec ( +- 4.48% ) [83.34%] 71,787,032 branch-misses # 0.25% of all branches ( +- 1.03% ) [83.33%] 42.162306788 seconds time elapsed ( +- 1.73% ) HUGETLB, v3.12-rc2: ------------------- Performance counter stats for './thp_memscale_hugetlbfs -c 80 -b 512M' (5 runs): 2588052.787264 task-clock # 54.400 CPUs utilized ( +- 3.69% ) 246,831 context-switches # 0.095 K/sec ( +- 4.15% ) 138 cpu-migrations # 0.000 K/sec ( +- 5.30% ) 21,027 page-faults # 0.008 K/sec ( +- 0.01% ) 6,166,666,307,263 cycles # 2.383 GHz ( +- 3.68% ) [83.33%] 6,086,008,929,407 stalled-cycles-frontend # 98.69% frontend cycles idle ( +- 3.77% ) [83.33%] 5,087,874,435,481 stalled-cycles-backend # 82.51% backend cycles idle ( +- 4.41% ) [66.67%] 133,782,831,249 instructions # 0.02 insns per cycle # 45.49 stalled cycles per insn ( +- 4.30% ) [83.34%] 34,026,870,541 branches # 13.148 M/sec ( +- 4.24% ) [83.34%] 68,670,942 branch-misses # 0.20% of all branches ( +- 3.26% ) [83.33%] 47.574936948 seconds time elapsed ( +- 2.09% ) THP off, patched: ----------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 943301.957892 task-clock # 56.256 CPUs utilized ( +- 3.01% ) 86,218 context-switches # 0.091 K/sec ( +- 3.17% ) 121 cpu-migrations # 0.000 K/sec ( +- 6.64% ) 10,000,551 page-faults # 0.011 M/sec ( +- 0.00% ) 2,230,462,457,654 cycles # 2.365 GHz ( +- 3.04% ) [83.32%] 2,204,616,385,805 stalled-cycles-frontend # 98.84% frontend cycles idle ( +- 3.09% ) [83.32%] 1,778,640,046,926 stalled-cycles-backend # 79.74% backend cycles idle ( +- 3.47% ) [66.69%] 45,995,472,617 instructions # 0.02 insns per cycle # 47.93 stalled cycles per insn ( +- 2.51% ) [83.34%] 9,179,700,174 branches # 9.731 M/sec ( +- 3.04% ) [83.35%] 89,166,529 branch-misses # 0.97% of all branches ( +- 1.45% ) [83.33%] 16.768027318 seconds time elapsed ( +- 2.47% ) THP on, patched: ---------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 458793.837905 task-clock # 54.632 CPUs utilized ( +- 0.79% ) 41,831 context-switches # 0.091 K/sec ( +- 0.97% ) 98 cpu-migrations # 0.000 K/sec ( +- 1.66% ) 57,829 page-faults # 0.126 K/sec ( +- 0.62% ) 1,077,543,336,716 cycles # 2.349 GHz ( +- 0.81% ) [83.33%] 1,067,403,802,964 stalled-cycles-frontend # 99.06% frontend cycles idle ( +- 0.87% ) [83.33%] 864,764,616,143 stalled-cycles-backend # 80.25% backend cycles idle ( +- 0.73% ) [66.68%] 16,129,177,440 instructions # 0.01 insns per cycle # 66.18 stalled cycles per insn ( +- 7.94% ) [83.35%] 3,618,938,569 branches # 7.888 M/sec ( +- 8.46% ) [83.36%] 33,242,032 branch-misses # 0.92% of all branches ( +- 2.02% ) [83.32%] 8.397885779 seconds time elapsed ( +- 0.18% ) HUGETLB, patched: ----------------- Performance counter stats for './thp_memscale_hugetlbfs -c 80 -b 512M' (5 runs): 395353.076837 task-clock # 20.329 CPUs utilized ( +- 8.16% ) 55,730 context-switches # 0.141 K/sec ( +- 5.31% ) 138 cpu-migrations # 0.000 K/sec ( +- 4.24% ) 21,027 page-faults # 0.053 K/sec ( +- 0.00% ) 930,219,717,244 cycles # 2.353 GHz ( +- 8.21% ) [83.32%] 914,295,694,103 stalled-cycles-frontend # 98.29% frontend cycles idle ( +- 8.35% ) [83.33%] 704,137,950,187 stalled-cycles-backend # 75.70% backend cycles idle ( +- 9.16% ) [66.69%] 30,541,538,385 instructions # 0.03 insns per cycle # 29.94 stalled cycles per insn ( +- 3.98% ) [83.35%] 8,415,376,631 branches # 21.286 M/sec ( +- 3.61% ) [83.36%] 32,645,478 branch-misses # 0.39% of all branches ( +- 3.41% ) [83.32%] 19.447481153 seconds time elapsed ( +- 2.00% ) This patch (of 11): CONFIG_GENERIC_LOCKBREAK increases sizeof(spinlock_t) to 8 bytes. It leads to increase sizeof(struct page) by 4 bytes on 32-bit system if split page table lock is in use, since page->ptl shares space in union with longs and pointers. Let's disable split page table lock on 32-bit systems with GENERIC_LOCKBREAK enabled. Signed-off-by: Kirill A. Shutemov Cc: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig b/mm/Kconfig index 3f4ffda..c28d247 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -219,6 +219,7 @@ config SPLIT_PTLOCK_CPUS default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC + default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" # -- cgit v0.10.2 From 57c1ffcefb5acb3c8b5f8436c325a6bdbd8e9c78 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:45 -0800 Subject: mm: rename USE_SPLIT_PTLOCKS to USE_SPLIT_PTE_PTLOCKS We're going to introduce split page table lock for PMD level. Let's rename existing split ptlock for PTE level to avoid confusion. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2a5907b..ff379ac 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -65,7 +65,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, return ret; } -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS /* * If we are using split PTE locks, then we need to take the page * lock here. Otherwise we are using shared mm->page_table_lock @@ -84,10 +84,10 @@ static inline void do_pte_unlock(spinlock_t *ptl) { spin_unlock(ptl); } -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */ static inline void do_pte_lock(spinlock_t *ptl) {} static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fdc3ba2..455c873 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -796,7 +796,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) { spinlock_t *ptl = NULL; -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS ptl = __pte_lockptr(page); spin_lock_nest_lock(ptl, &mm->page_table_lock); #endif @@ -1637,7 +1637,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); xen_mc_issue(PARAVIRT_LAZY_MMU); @@ -1671,7 +1671,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) if (!PageHighMem(page)) { xen_mc_batch(); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); __set_pfn_prot(pfn, PAGE_KERNEL); diff --git a/include/linux/mm.h b/include/linux/mm.h index 42a35d9..dc3333d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1316,7 +1316,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS /* * We tuck a spinlock to guard each pagetable page into its struct page, * at page->private, with BUILD_BUG_ON to make sure that this will not @@ -1329,14 +1329,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } while (0) #define pte_lock_deinit(page) ((page)->mapping = NULL) #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ #define pte_lock_init(page) do {} while (0) #define pte_lock_deinit(page) do {} while (0) #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ static inline void pgtable_page_ctor(struct page *page) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a3198e5..f1ff66d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,7 +23,7 @@ struct address_space; -#define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) /* * Each physical page in the system has a struct page associated with @@ -141,7 +141,7 @@ struct page { * indicates order in the buddy * system if PG_buddy is set. */ -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS spinlock_t ptl; #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ @@ -309,14 +309,14 @@ enum { NR_MM_COUNTERS }; -#if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU) +#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) #define SPLIT_RSS_COUNTING /* per-thread cached information, */ struct task_rss_stat { int events; /* for synchronization threshold */ int count[NR_MM_COUNTERS]; }; -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ struct mm_rss_stat { atomic_long_t count[NR_MM_COUNTERS]; -- cgit v0.10.2 From e1f56c89b040134add93f686931cc266541d239a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:48 -0800 Subject: mm: convert mm->nr_ptes to atomic_long_t With split page table lock for PMD level we can't hold mm->page_table_lock while updating nr_ptes. Let's convert it to atomic_long_t to avoid races. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index abbe825..8faaebd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -62,7 +62,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + (PTRS_PER_PTE * sizeof(pte_t) * + atomic_long_read(&mm->nr_ptes)) >> 10, swap << (PAGE_SHIFT-10)); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f1ff66d..566df57 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -339,6 +339,7 @@ struct mm_struct { pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ + atomic_long_t nr_ptes; /* Page table pages */ int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ @@ -360,7 +361,6 @@ struct mm_struct { unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ unsigned long def_flags; - unsigned long nr_ptes; /* Page table pages */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; diff --git a/kernel/fork.c b/kernel/fork.c index f6d11fc..e252075 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -532,7 +532,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->flags = (current->mm) ? (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; mm->core_state = NULL; - mm->nr_ptes = 0; + atomic_long_set(&mm->nr_ptes, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); mm_init_aio(mm); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0556c6a..e5b2d31 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -738,7 +738,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); spin_unlock(&mm->page_table_lock); } @@ -771,7 +771,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); return true; } @@ -896,7 +896,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd = pmd_mkold(pmd_wrprotect(pmd)); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); - dst_mm->nr_ptes++; + atomic_long_inc(&dst_mm->nr_ptes); ret = 0; out_unlock: @@ -1392,7 +1392,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(&tlb->mm->page_table_lock); put_huge_zero_page(); } else { @@ -1401,7 +1401,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON(!PageHead(page)); - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(&tlb->mm->page_table_lock); tlb_remove_page(tlb, page); } diff --git a/mm/memory.c b/mm/memory.c index bf86658..0b5a93a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -382,7 +382,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, pgtable_t token = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free_tlb(tlb, token, addr); - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); } static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -573,7 +573,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, spin_lock(&mm->page_table_lock); wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); pmd_populate(mm, pmd, new); new = NULL; } else if (unlikely(pmd_trans_splitting(*pmd))) diff --git a/mm/mmap.c b/mm/mmap.c index 5a6badd..834b2d7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2724,7 +2724,8 @@ void exit_mmap(struct mm_struct *mm) } vm_unacct_memory(nr_accounted); - WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + WARN_ON(atomic_long_read(&mm->nr_ptes) > + (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); } /* Insert vm structure into process list sorted by address diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6738c47..1e4a600 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -161,7 +161,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ - points = get_mm_rss(p->mm) + p->mm->nr_ptes + + points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) + get_mm_counter(p->mm, MM_SWAPENTS); task_unlock(p); @@ -364,10 +364,10 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas continue; } - pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5hd %s\n", + pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu %5hd %s\n", task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), - task->mm->nr_ptes, + atomic_long_read(&task->mm->nr_ptes), get_mm_counter(task->mm, MM_SWAPENTS), task->signal->oom_score_adj, task->comm); task_unlock(task); -- cgit v0.10.2 From 9a86cb7bdc4ccbe3f99a1ca275b90a322a90f9ce Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:51 -0800 Subject: mm: introduce api for split page table lock for PMD level Basic api, backed by mm->page_table_lock for now. Actual implementation will be added later. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index dc3333d..4f4ca41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1378,6 +1378,19 @@ static inline void pgtable_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} + + +static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); + return ptl; +} + extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); -- cgit v0.10.2 From bf929152e9f6c49b66fad4ebf08cc95b02ce48f5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:54 -0800 Subject: mm, thp: change pmd_trans_huge_lock() to return taken lock With split ptlock it's important to know which lock pmd_trans_huge_lock() took. This patch adds one more parameter to the function to return the lock. In most places migration to new api is trivial. Exception is move_huge_pmd(): we need to take two locks if pmd tables are different. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8faaebd..42b5cf5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; + spinlock_t *ptl; pte_t *pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (err) break; } - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return err; } @@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; - if (pmd_trans_huge_lock(pmd, md->vma) == 1) { + if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; @@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return 0; } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 3935428..4aca0d8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); -extern int __pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma); +extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl); /* mmap_sem must be held on entry */ -static inline int pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma) +static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); if (pmd_trans_huge(*pmd)) - return __pmd_trans_huge_lock(pmd, vma); + return __pmd_trans_huge_lock(pmd, vma, ptl); else return 0; } @@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, long adjust_next) { } -static inline int pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma) +static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e5b2d31..471eb04 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1376,9 +1376,10 @@ out: int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { struct page *page; pgtable_t pgtable; pmd_t orig_pmd; @@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { atomic_long_dec(&tlb->mm->nr_ptes); - spin_unlock(&tlb->mm->page_table_lock); + spin_unlock(ptl); put_huge_zero_page(); } else { page = pmd_page(orig_pmd); @@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON(!PageHead(page)); atomic_long_dec(&tlb->mm->nr_ptes); - spin_unlock(&tlb->mm->page_table_lock); + spin_unlock(ptl); tlb_remove_page(tlb, page); } pte_free(tlb->mm, pgtable); @@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec) { + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { /* * All logical pages in the range are present * if backed by a huge page. */ - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); memset(vec, 1, (end - addr) >> PAGE_SHIFT); ret = 1; } @@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) { + spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; @@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, goto out; } - ret = __pmd_trans_huge_lock(old_pmd, vma); + /* + * We don't have to worry about the ordering of src and dst + * ptlocks because exclusive mmap_sem prevents deadlock. + */ + ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl); if (ret == 1) { + new_ptl = pmd_lockptr(mm, new_pmd); + if (new_ptl != old_ptl) + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - spin_unlock(&mm->page_table_lock); + if (new_ptl != old_ptl) + spin_unlock(new_ptl); + spin_unlock(old_ptl); } out: return ret; @@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; ret = 1; if (!prot_numa) { @@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (ret == HPAGE_PMD_NR) set_pmd_at(mm, addr, pmd, entry); - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); } return ret; @@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * Note that if it returns 1, this routine returns without unlocking page * table locks. So callers must unlock them. */ -int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) +int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { - spin_lock(&vma->vm_mm->page_table_lock); + *ptl = pmd_lock(vma->vm_mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(*ptl); wait_split_huge_page(vma->anon_vma, pmd); return -1; } else { @@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) return 1; } } - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(*ptl); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e3cd40b..f1a0ae6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) mc.precharge += HPAGE_PMD_NR; - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } @@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, * to be unlocked in __split_huge_page_splitting(), where the main * part of thp split is not executed yet. */ - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (mc.precharge < HPAGE_PMD_NR) { - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); @@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, } put_page(page); } - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } -- cgit v0.10.2 From 117b0791ac42f2ec447bc864e70ad622b5604059 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:56 -0800 Subject: mm, thp: move ptl taking inside page_check_address_pmd() With split page table lock we can't know which lock we need to take before we find the relevant pmd. Let's move lock taking inside the function. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4aca0d8..91672e2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -54,7 +54,8 @@ enum page_check_address_pmd_flag { extern pmd_t *page_check_address_pmd(struct page *page, struct mm_struct *mm, unsigned long address, - enum page_check_address_pmd_flag flag); + enum page_check_address_pmd_flag flag, + spinlock_t **ptl); #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<vm_mm; + spinlock_t *ptl; pmd_t *pmd; int ret = 0; /* For mmu_notifiers */ @@ -1600,9 +1612,8 @@ static int __split_huge_page_splitting(struct page *page, const unsigned long mmun_end = address + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG); + PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG, &ptl); if (pmd) { /* * We can't temporarily set the pmd to null in order @@ -1613,8 +1624,8 @@ static int __split_huge_page_splitting(struct page *page, */ pmdp_splitting_flush(vma, address, pmd); ret = 1; + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return ret; @@ -1745,14 +1756,14 @@ static int __split_huge_page_map(struct page *page, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; pmd_t *pmd, _pmd; int ret = 0, i; pgtable_t pgtable; unsigned long haddr; - spin_lock(&mm->page_table_lock); pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); + PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG, &ptl); if (pmd) { pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -1807,8 +1818,8 @@ static int __split_huge_page_map(struct page *page, pmdp_invalidate(vma, address, pmd); pmd_populate(mm, pmd, pgtable); ret = 1; + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index fd3ee7a..b59d741 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -665,25 +665,23 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long *vm_flags) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; int referenced = 0; if (unlikely(PageTransHuge(page))) { pmd_t *pmd; - spin_lock(&mm->page_table_lock); /* * rmap might return false positives; we must filter * these out using page_check_address_pmd(). */ pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_FLAG); - if (!pmd) { - spin_unlock(&mm->page_table_lock); + PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); + if (!pmd) goto out; - } if (vma->vm_flags & VM_LOCKED) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); *mapcount = 0; /* break early from loop */ *vm_flags |= VM_LOCKED; goto out; @@ -692,10 +690,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, /* go ahead even if the pmd is pmd_trans_splitting() */ if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } else { pte_t *pte; - spinlock_t *ptl; /* * rmap might return false positives; we must filter -- cgit v0.10.2 From c389a250ab4cfa4a3775d9f2c45271618af6d5b2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:59 -0800 Subject: mm, thp: do not access mm->pmd_huge_pte directly Currently mm->pmd_huge_pte protected by page table lock. It will not work with split lock. We have to have per-pmd pmd_huge_pte for proper access serialization. For now, let's just introduce wrapper to access mm->pmd_huge_pte. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0a2e5e0..1ea18fc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1244,11 +1244,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(lh); else - list_add(lh, (struct list_head *) mm->pmd_huge_pte); - mm->pmd_huge_pte = pgtable; + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) @@ -1260,12 +1260,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); lh = (struct list_head *) pgtable; if (list_empty(lh)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = (pgtable_t) lh->next; + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; list_del(lh); } ptep = (pte_t *) pgtable; diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 7a91f28..656cc46 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -196,11 +196,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(lh); else - list_add(lh, (struct list_head *) mm->pmd_huge_pte); - mm->pmd_huge_pte = pgtable; + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) @@ -211,12 +211,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); lh = (struct list_head *) pgtable; if (list_empty(lh)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = (pgtable_t) lh->next; + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; list_del(lh); } pte_val(pgtable[0]) = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 4f4ca41..861cad5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1383,6 +1383,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } +#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) { diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 3929a40..41fee3e 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -154,11 +154,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(&pgtable->lru); else - list_add(&pgtable->lru, &mm->pmd_huge_pte->lru); - mm->pmd_huge_pte = pgtable; + list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); + pmd_huge_pte(mm, pmdp) = pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -173,11 +173,11 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); if (list_empty(&pgtable->lru)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = list_entry(pgtable->lru.next, + pmd_huge_pte(mm, pmdp) = list_entry(pgtable->lru.next, struct page, lru); list_del(&pgtable->lru); } -- cgit v0.10.2 From cb900f41215447433cbc456d1c4294e858a84d7c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:02 -0800 Subject: mm, hugetlb: convert hugetlbfs to use split pmd lock Hugetlb supports multiple page sizes. We use split lock only for PMD level, but not for PUD. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c805d5b..a77d2b2 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -1,8 +1,8 @@ #include -#include #include #include #include +#include #include #include #include diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0393270..acd2010 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -392,6 +392,15 @@ static inline int hugepage_migration_support(struct hstate *h) return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); } +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + if (huge_page_size(h) == PMD_SIZE) + return pmd_lockptr(mm, (pmd_t *) pte); + VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); + return &mm->page_table_lock; +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page_node(h, nid) NULL @@ -401,6 +410,7 @@ struct hstate {}; #define hstate_sizelog(s) NULL #define hstate_vma(v) NULL #define hstate_inode(i) NULL +#define page_hstate(page) NULL #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK #define vma_kernel_pagesize(v) PAGE_SIZE @@ -421,6 +431,22 @@ static inline pgoff_t basepage_index(struct page *page) #define dissolve_free_huge_pages(s, e) do {} while (0) #define pmd_huge_support() 0 #define hugepage_migration_support(h) 0 + +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} #endif /* CONFIG_HUGETLB_PAGE */ +static inline spinlock_t *huge_pte_lock(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + spinlock_t *ptl; + + ptl = huge_pte_lockptr(h, mm, pte); + spin_lock(ptl); + return ptl; +} + #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 8d4fa82..c0f7526 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -139,7 +139,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); +extern void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte); #else #define make_migration_entry(page, write) swp_entry(0, 0) @@ -151,8 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp) static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } -static inline void migration_entry_wait_huge(struct mm_struct *mm, - pte_t *pte) { } +static inline void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte) { } static inline int is_write_migration_entry(swp_entry_t entry) { return 0; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0b7656e..7d57af2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { + spinlock_t *src_ptl, *dst_ptl; src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; @@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, if (dst_pte == src_pte) continue; - spin_lock(&dst->page_table_lock); - spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); + dst_ptl = huge_pte_lock(h, dst, dst_pte); + src_ptl = huge_pte_lockptr(h, src, src_pte); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); if (!huge_pte_none(huge_ptep_get(src_pte))) { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); @@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, page_dup_rmap(ptepage); set_huge_pte_at(dst, addr, dst_pte, entry); } - spin_unlock(&src->page_table_lock); - spin_unlock(&dst->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); } return 0; @@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address; pte_t *ptep; pte_t pte; + spinlock_t *ptl; struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); again: - spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; + ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) - continue; + goto unlock; pte = huge_ptep_get(ptep); if (huge_pte_none(pte)) - continue; + goto unlock; /* * HWPoisoned hugepage is already unmapped and dropped reference */ if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { huge_pte_clear(mm, address, ptep); - continue; + goto unlock; } page = pte_page(pte); @@ -2484,7 +2487,7 @@ again: */ if (ref_page) { if (page != ref_page) - continue; + goto unlock; /* * Mark the VMA as having unmapped its page so that @@ -2501,13 +2504,18 @@ again: page_remove_rmap(page); force_flush = !__tlb_remove_page(tlb, page); - if (force_flush) + if (force_flush) { + spin_unlock(ptl); break; + } /* Bail out after unmapping reference page if supplied */ - if (ref_page) + if (ref_page) { + spin_unlock(ptl); break; + } +unlock: + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); /* * mmu_gather ran out of room to batch pages, we break out of * the PTE lock to avoid doing the potential expensive TLB invalidate @@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, - struct page *pagecache_page) + struct page *pagecache_page, spinlock_t *ptl) { struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; @@ -2647,8 +2655,8 @@ retry_avoidcopy: page_cache_get(old_page); - /* Drop page_table_lock as buddy allocator may be called */ - spin_unlock(&mm->page_table_lock); + /* Drop page table lock as buddy allocator may be called */ + spin_unlock(ptl); new_page = alloc_huge_page(vma, address, outside_reserve); if (IS_ERR(new_page)) { @@ -2666,13 +2674,13 @@ retry_avoidcopy: BUG_ON(huge_pte_none(pte)); if (unmap_ref_private(mm, vma, old_page, address)) { BUG_ON(huge_pte_none(pte)); - spin_lock(&mm->page_table_lock); + spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) goto retry_avoidcopy; /* - * race occurs while re-acquiring page_table_lock, and - * our job is done. + * race occurs while re-acquiring page table + * lock, and our job is done. */ return 0; } @@ -2680,7 +2688,7 @@ retry_avoidcopy: } /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (err == -ENOMEM) return VM_FAULT_OOM; else @@ -2695,7 +2703,7 @@ retry_avoidcopy: page_cache_release(new_page); page_cache_release(old_page); /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); return VM_FAULT_OOM; } @@ -2707,10 +2715,10 @@ retry_avoidcopy: mmun_end = mmun_start + huge_page_size(h); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* - * Retake the page_table_lock to check for racing updates + * Retake the page table lock to check for racing updates * before the page tables are altered */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) { ClearPagePrivate(new_page); @@ -2724,13 +2732,13 @@ retry_avoidcopy: /* Make the old page be freed below */ new_page = old_page; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); page_cache_release(new_page); page_cache_release(old_page); /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); return 0; } @@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; struct address_space *mapping; pte_t new_pte; + spinlock_t *ptl; /* * Currently, we are forced to kill the process in the event the @@ -2864,7 +2873,8 @@ retry: goto backout_unlocked; } - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(h, mm, ptep); + spin_lock(ptl); size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto backout; @@ -2885,16 +2895,16 @@ retry: if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); + ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); unlock_page(page); out: return ret; backout: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); backout_unlocked: unlock_page(page); put_page(page); @@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, { pte_t *ptep; pte_t entry; + spinlock_t *ptl; int ret; struct page *page = NULL; struct page *pagecache_page = NULL; @@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (ptep) { entry = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait_huge(mm, ptep); + migration_entry_wait_huge(vma, mm, ptep); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | @@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (page != pagecache_page) lock_page(page); - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(h, mm, ptep); + spin_lock(ptl); /* Check for a racing update before calling hugetlb_cow */ if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) - goto out_page_table_lock; + goto out_ptl; if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, - pagecache_page); - goto out_page_table_lock; + pagecache_page, ptl); + goto out_ptl; } entry = huge_pte_mkdirty(entry); } @@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, flags & FAULT_FLAG_WRITE)) update_mmu_cache(vma, address, ptep); -out_page_table_lock: - spin_unlock(&mm->page_table_lock); +out_ptl: + spin_unlock(ptl); if (pagecache_page) { unlock_page(pagecache_page); @@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long remainder = *nr_pages; struct hstate *h = hstate_vma(vma); - spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { pte_t *pte; + spinlock_t *ptl = NULL; int absent; struct page *page; @@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * Some archs (sparc64, sh*) have multiple pte_ts to * each hugepage. We have to make sure we get the * first, for the page indexing below to work. + * + * Note that page table lock is not held when pte is null. */ pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); + if (pte) + ptl = huge_pte_lock(h, mm, pte); absent = !pte || huge_pte_none(huge_ptep_get(pte)); /* @@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if (absent && (flags & FOLL_DUMP) && !hugetlbfs_pagecache_present(h, vma, vaddr)) { + if (pte) + spin_unlock(ptl); remainder = 0; break; } @@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, !huge_pte_write(huge_ptep_get(pte)))) { int ret; - spin_unlock(&mm->page_table_lock); + if (pte) + spin_unlock(ptl); ret = hugetlb_fault(mm, vma, vaddr, (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); - spin_lock(&mm->page_table_lock); if (!(ret & VM_FAULT_ERROR)) continue; @@ -3096,8 +3114,8 @@ same_page: */ goto same_page; } + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); *nr_pages = remainder; *position = vaddr; @@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, flush_cache_range(vma, address, end); mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); - spin_lock(&mm->page_table_lock); for (; address < end; address += huge_page_size(h)) { + spinlock_t *ptl; ptep = huge_pte_offset(mm, address); if (!ptep) continue; + ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { pages++; + spin_unlock(ptl); continue; } if (!huge_pte_none(huge_ptep_get(ptep))) { @@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, set_huge_pte_at(mm, address, ptep, pte); pages++; } + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); /* * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare * may have cleared our pud entry and done put_page on the page table: @@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) unsigned long saddr; pte_t *spte = NULL; pte_t *pte; + spinlock_t *ptl; if (!vma_shareable(vma, addr)) return (pte_t *)pmd_alloc(mm, pud, addr); @@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!spte) goto out; - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); + spin_lock(ptl); if (pud_none(*pud)) pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); else put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out: pte = (pte_t *)pmd_alloc(mm, pud, addr); mutex_unlock(&mapping->i_mmap_mutex); @@ -3340,7 +3362,7 @@ out: * indicated by page_count > 1, unmap is achieved by clearing pud and * decrementing the ref count. If count == 1, the pte page is not shared. * - * called with vma->vm_mm->page_table_lock held. + * called with page table lock held. * * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4cc19f6..c4403cd 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, #ifdef CONFIG_HUGETLB_PAGE int nid; struct page *page; + spinlock_t *ptl; - spin_lock(&vma->vm_mm->page_table_lock); + ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); page = pte_page(huge_ptep_get((pte_t *)pmd)); nid = page_to_nid(page); if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) @@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) isolate_huge_page(page, private); unlock: - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); #else BUG(); #endif diff --git a/mm/migrate.c b/mm/migrate.c index dfc8300..01f45ce 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, ptep = huge_pte_offset(mm, addr); if (!ptep) goto out; - ptl = &mm->page_table_lock; + ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep); } else { pmd = mm_find_pmd(mm, addr); if (!pmd) @@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, __migration_entry_wait(mm, ptep, ptl); } -void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) +void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte) { - spinlock_t *ptl = &(mm)->page_table_lock; + spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte); __migration_entry_wait(mm, pte, ptl); } diff --git a/mm/rmap.c b/mm/rmap.c index b59d741..55c8b8d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, if (unlikely(PageHuge(page))) { pte = huge_pte_offset(mm, address); - ptl = &mm->page_table_lock; + ptl = huge_pte_lockptr(page_hstate(page), mm, pte); goto check; } -- cgit v0.10.2 From c4088ebdca64c9a2e34a38177d2249805ede1f4b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:04 -0800 Subject: mm: convert the rest to new page table lock api Only trivial cases left. Let's convert them altogether. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c2082ab..bccd5a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, struct page *page) { pgtable_t pgtable; + spinlock_t *ptl; VM_BUG_ON(!PageCompound(page)); pgtable = pte_alloc_one(mm, haddr); @@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, */ __SetPageUptodate(page); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_none(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mem_cgroup_uncharge_page(page); put_page(page); pte_free(mm, pgtable); @@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); atomic_long_inc(&mm->nr_ptes); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } return 0; @@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag, HPAGE_PMD_ORDER, vma, haddr, nd); } +/* Caller must hold page table lock. */ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) @@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; if (!(flags & FAULT_FLAG_WRITE) && transparent_hugepage_use_zero_page()) { + spinlock_t *ptl; pgtable_t pgtable; struct page *zero_page; bool set; @@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, zero_page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (!set) { pte_free(mm, pgtable); put_huge_zero_page(); @@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma) { + spinlock_t *dst_ptl, *src_ptl; struct page *src_page; pmd_t pmd; pgtable_t pgtable; @@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (unlikely(!pgtable)) goto out; - spin_lock(&dst_mm->page_table_lock); - spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING); + dst_ptl = pmd_lock(dst_mm, dst_pmd); + src_ptl = pmd_lockptr(src_mm, src_pmd); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ret = -EAGAIN; pmd = *src_pmd; @@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } /* - * mm->page_table_lock is enough to be sure that huge zero pmd is not + * When page table lock is held, the huge zero pmd should not be * under splitting since we don't split the page itself, only pmd to * a page table. */ @@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, } if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ - spin_unlock(&src_mm->page_table_lock); - spin_unlock(&dst_mm->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); pte_free(dst_mm, pgtable); wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ @@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = 0; out_unlock: - spin_unlock(&src_mm->page_table_lock); - spin_unlock(&dst_mm->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); out: return ret; } @@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm, pmd_t *pmd, pmd_t orig_pmd, int dirty) { + spinlock_t *ptl; pmd_t entry; unsigned long haddr; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto unlock; @@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm, update_mmu_cache_pmd(vma, address, pmd); unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) { + spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; struct page *page; @@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_page; @@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, } smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); put_huge_zero_page(); inc_mm_counter(mm, MM_ANONPAGES); @@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, out: return ret; out_free_page: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_page(page); put_page(page); @@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, struct page *page, unsigned long haddr) { + spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; int ret = 0, i; @@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; VM_BUG_ON(!PageHead(page)); @@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); page_remove_rmap(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); @@ -1092,7 +1100,7 @@ out: return ret; out_free_pages: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_start(); for (i = 0; i < HPAGE_PMD_NR; i++) { @@ -1107,17 +1115,19 @@ out_free_pages: int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd) { + spinlock_t *ptl; int ret = 0; struct page *page = NULL, *new_page; unsigned long haddr; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ + ptl = pmd_lockptr(mm, pmd); VM_BUG_ON(!vma->anon_vma); haddr = address & HPAGE_PMD_MASK; if (is_huge_zero_pmd(orig_pmd)) goto alloc; - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_unlock; @@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; } get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); alloc: if (transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) @@ -1180,11 +1190,11 @@ alloc: mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (page) put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mem_cgroup_uncharge_page(new_page); put_page(new_page); goto out_mn; @@ -1206,13 +1216,13 @@ alloc: } ret |= VM_FAULT_WRITE; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out_mn: mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out: return ret; out_unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); return ret; } @@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmd)); if (flags & FOLL_WRITE && !pmd_write(*pmd)) goto out; @@ -1271,6 +1281,7 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + spinlock_t *ptl; struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; @@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, bool migrated = false; int flags = 0; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmdp); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; @@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * relock and check_same as the page may no longer be mapped. * As the fault is being retried, do not account for it. */ - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); wait_on_page_locked(page); page_nid = -1; goto out; @@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Page is misplaced, serialise migrations and parallel THP splits */ get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (!page_locked) lock_page(page); anon_vma = page_lock_anon_vma_read(page); /* Confirm the PMD did not change while page_table_lock was released */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); @@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * Migrate the THP to the requested node, returns with page unlocked * and pmd_numa cleared. */ - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); if (migrated) { @@ -1361,7 +1372,7 @@ clear_pmdnuma: update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); out_unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out: if (anon_vma) @@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm, pte_t *pte; pgtable_t pgtable; struct page *new_page; - spinlock_t *ptl; + spinlock_t *pmd_ptl, *pte_ptl; int isolated; unsigned long hstart, hend; unsigned long mmun_start; /* For mmu_notifiers */ @@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm, anon_vma_lock_write(vma->anon_vma); pte = pte_offset_map(pmd, address); - ptl = pte_lockptr(mm, pmd); + pte_ptl = pte_lockptr(mm, pmd); mmun_start = address; mmun_end = address + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); /* probably unnecessary */ + pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ /* * After this gup_fast can't run anymore. This also removes * any huge TLB entry from the CPU so we won't allow @@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm, * to avoid the risk of CPU bugs in that area. */ _pmd = pmdp_clear_flush(vma, address, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); - spin_lock(ptl); + spin_lock(pte_ptl); isolated = __collapse_huge_page_isolate(vma, address, pte); - spin_unlock(ptl); + spin_unlock(pte_ptl); if (unlikely(!isolated)) { pte_unmap(pte); - spin_lock(&mm->page_table_lock); + spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); /* * We can only use set_pmd_at when establishing @@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm, * points to regular pagetables. Use pmd_populate for that */ pmd_populate(mm, pmd, pmd_pgtable(_pmd)); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); anon_vma_unlock_write(vma->anon_vma); goto out; } @@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm, */ anon_vma_unlock_write(vma->anon_vma); - __collapse_huge_page_copy(pte, new_page, vma, address, ptl); + __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl); pte_unmap(pte); __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); @@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm, */ smp_wmb(); - spin_lock(&mm->page_table_lock); + spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); *hpage = NULL; @@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd) { + spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; unsigned long haddr = address & HPAGE_PMD_MASK; @@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, mmun_end = haddr + HPAGE_PMD_SIZE; again: mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_trans_huge(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return; } if (is_huge_zero_pmd(*pmd)) { __split_huge_zero_page_pmd(vma, haddr, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return; } page = pmd_page(*pmd); VM_BUG_ON(!page_count(page)); get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); split_huge_page(page); diff --git a/mm/memory.c b/mm/memory.c index 0b5a93a..d05c6b1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -550,6 +550,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd, unsigned long address) { + spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); int wait_split_huge_page; if (!new) @@ -570,7 +571,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ atomic_long_inc(&mm->nr_ptes); @@ -578,7 +579,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, new = NULL; } else if (unlikely(pmd_trans_splitting(*pmd))) wait_split_huge_page = 1; - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (new) pte_free(mm, new); if (wait_split_huge_page) @@ -1516,20 +1517,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma, split_huge_page_pmd(vma, address, pmd); goto split_fallthrough; } - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); wait_split_huge_page(vma->anon_vma, pmd); } else { page = follow_trans_huge_pmd(vma, address, pmd, flags); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; goto out; } } else - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); /* fall through */ } split_fallthrough: diff --git a/mm/migrate.c b/mm/migrate.c index 01f45ce..316e720 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1667,6 +1667,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unsigned long address, struct page *page, int node) { + spinlock_t *ptl; unsigned long haddr = address & HPAGE_PMD_MASK; pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; @@ -1706,9 +1707,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, WARN_ON(PageLRU(new_page)); /* Recheck the target PMD */ - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, entry))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); /* Reverse changes made by migrate_page_copy() */ if (TestClearPageActive(new_page)) @@ -1753,7 +1754,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, * before it's fully transferred to the new page. */ mem_cgroup_end_migration(memcg, page, new_page, true); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); unlock_page(new_page); unlock_page(page); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 41fee3e..cbb3854 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -151,7 +151,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ if (!pmd_huge_pte(mm, pmdp)) @@ -170,7 +170,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { pgtable_t pgtable; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ pgtable = pmd_huge_pte(mm, pmdp); -- cgit v0.10.2 From e009bb30c8df8a52a9622b616b67436b6a03a0cd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:07 -0800 Subject: mm: implement split page table lock for PMD level The basic idea is the same as with PTE level: the lock is embedded into struct page of table's page. We can't use mm->pmd_huge_pte to store pgtables for THP, since we don't take mm->page_table_lock anymore. Let's reuse page->lru of table's page for that. pgtable_pmd_page_ctor() returns true, if initialization is successful and false otherwise. Current implementation never fails, but assumption that constructor can fail will help to port it to -rt where spinlock_t is rather huge and cannot be embedded into struct page -- dynamic allocation is required. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Reviewed-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 861cad5..255750d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1378,13 +1378,45 @@ static inline void pgtable_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) +#if USE_SPLIT_PMD_PTLOCKS + +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &virt_to_page(pmd)->ptl; +} + +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ + spin_lock_init(&page->ptl); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + page->pmd_huge_pte = NULL; +#endif + return true; +} + +static inline void pgtable_pmd_page_dtor(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON(page->pmd_huge_pte); +#endif +} + +#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) + +#else + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } +static inline void pgtable_pmd_page_dtor(struct page *page) {} + #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) +#endif + static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl = pmd_lockptr(mm, pmd); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 566df57..9342610 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,6 +24,8 @@ struct address_space; #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ + IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) /* * Each physical page in the system has a struct page associated with @@ -63,6 +65,9 @@ struct page { * this page is only used to * free other pages. */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; union { @@ -406,7 +411,7 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/kernel/fork.c b/kernel/fork.c index e252075..728d5be 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -560,7 +560,7 @@ static void check_mm(struct mm_struct *mm) "mm:%p idx:%d val:%ld\n", mm, i, x); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON(mm->pmd_huge_pte); #endif } @@ -814,7 +814,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); mm_init_cpumask(mm); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif if (!mm_init(mm, tsk)) diff --git a/mm/Kconfig b/mm/Kconfig index c28d247..7aa02de 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -222,6 +222,9 @@ config SPLIT_PTLOCK_CPUS default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + boolean + # # support for memory balloon compaction config BALLOON_COMPACTION -- cgit v0.10.2 From 9491846fca57e9326b6673716c386b76fc13ebca Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:10 -0800 Subject: x86, mm: enable split page table lock for PMD level Enable PMD split page table lock for X86_64 and PAE. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Reviewed-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6e3e1cb..af5513e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1885,6 +1885,10 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on X86_64 || X86_PAE + menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b4389a4..c4412e9 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -80,12 +80,21 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #if PAGETABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + struct page *page; + page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + if (!page) + return NULL; + if (!pgtable_pmd_page_ctor(page)) { + __free_pages(page, 0); + return NULL; + } + return (pmd_t *)page_address(page); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); } -- cgit v0.10.2 From 09ef4939850aa81e3822b5dfb9ba2ada5e565816 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:13 -0800 Subject: x86: add missed pgtable_pmd_page_ctor/dtor calls for preallocated pmds In split page table lock case, we embed spinlock_t into struct page. For obvious reason, we don't want to increase size of struct page if spinlock_t is too big, like with DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC or on -rt kernel. So we disable split page table lock, if spinlock_t is too big. This patchset allows to allocate the lock dynamically if spinlock_t is big. In this page->ptl is used to store pointer to spinlock instead of spinlock itself. It costs additional cache line for indirect access, but fix page fault scalability for multi-threaded applications. LOCK_STAT depends on DEBUG_SPINLOCK, so on current kernel enabling LOCK_STAT to analyse scalability issues breaks scalability. ;) The patchset mostly fixes this. Results for ./thp_memscale -c 80 -b 512M on 4-socket machine: baseline, no CONFIG_LOCK_STAT: 9.115460703 seconds time elapsed baseline, CONFIG_LOCK_STAT=y: 53.890567123 seconds time elapsed patched, no CONFIG_LOCK_STAT: 8.852250368 seconds time elapsed patched, CONFIG_LOCK_STAT=y: 11.069770759 seconds time elapsed Patch count is scary, but most of them trivial. Overview: Patches 1-4 Few bug fixes. No dependencies to other patches. Probably should applied as soon as possible. Patch 5 Changes signature of pgtable_page_ctor(). We will use it for dynamic lock allocation, so it can fail. Patches 6-8 Add missing constructor/destructor calls on few archs. It's fixes NR_PAGETABLE accounting and prepare to use split ptl. Patches 9-33 Add pgtable_page_ctor() fail handling to all archs. Patches 34 Finally adds support of dynamically-allocated page->pte. Also contains documentation for split page table lock. This patch (of 34): I've missed that we preallocate few pmds on pgd_alloc() if X86_PAE enabled. Let's add missed constructor/destructor calls. I haven't noticed it during testing since prep_new_page() clears page->mapping and therefore page->ptl. It's effectively equal to spin_lock_init(&page->ptl). Signed-off-by: Kirill A. Shutemov Acked-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Kirill A. Shutemov" Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: David Howells Cc: David S. Miller Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Grant Likely Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Hirokazu Takata Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Paul Mundt Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Rob Herring Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfa537a..1a7d213 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -189,8 +189,10 @@ static void free_pmds(pmd_t *pmds[]) int i; for(i = 0; i < PREALLOCATED_PMDS; i++) - if (pmds[i]) + if (pmds[i]) { + pgtable_pmd_page_dtor(virt_to_page(pmds[i])); free_page((unsigned long)pmds[i]); + } } static int preallocate_pmds(pmd_t *pmds[]) @@ -200,8 +202,13 @@ static int preallocate_pmds(pmd_t *pmds[]) for(i = 0; i < PREALLOCATED_PMDS; i++) { pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); - if (pmd == NULL) + if (!pmd) + failed = true; + if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { + free_page((unsigned long)pmds[i]); + pmd = NULL; failed = true; + } pmds[i] = pmd; } -- cgit v0.10.2 From 26db39027c00744107acf184527010a1e05907e7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:17 -0800 Subject: cris: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Mikael Starvik Acked-by: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 6da975d..d9504d3 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -32,6 +32,8 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres { struct page *pte; pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + if (!pte) + return NULL; pgtable_page_ctor(pte); return pte; } -- cgit v0.10.2 From fecf3743b824ce4eb275ed4a1d6aee9494f6a966 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:18 -0800 Subject: m32r: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/include/asm/pgalloc.h b/arch/m32r/include/asm/pgalloc.h index 0fc7361..ac4208b 100644 --- a/arch/m32r/include/asm/pgalloc.h +++ b/arch/m32r/include/asm/pgalloc.h @@ -43,6 +43,8 @@ static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pte) + return NULL; pgtable_page_ctor(pte); return pte; } -- cgit v0.10.2 From f8c6d30b766fc8eb83f5b7983ff8a5a9b3189365 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:19 -0800 Subject: xtensa: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index cf914c8..037671a 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -51,9 +51,13 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) { + pte_t *pte; struct page *page; - page = virt_to_page(pte_alloc_one_kernel(mm, addr)); + pte = pte_alloc_one_kernel(mm, addr); + if (!pte) + return NULL; + page = virt_to_page(pte); pgtable_page_ctor(page); return page; } -- cgit v0.10.2 From 390f44e2aa2ab83f08231d7d05f066dc3494490e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:20 -0800 Subject: mm: allow pgtable_page_ctor() to fail Change pgtable_page_ctor() return type from void to bool. Returns true, if initialization is successful and false otherwise. Current implementation never fails, but it will change later. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 255750d..e855351 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1338,10 +1338,11 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline void pgtable_page_ctor(struct page *page) +static inline bool pgtable_page_ctor(struct page *page) { pte_lock_init(page); inc_zone_page_state(page, NR_PAGETABLE); + return true; } static inline void pgtable_page_dtor(struct page *page) -- cgit v0.10.2 From 8abe73465660f12dee03871f681175f4dae62e7f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:21 -0800 Subject: microblaze: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index ebd3579..7fdf7fa 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -122,8 +122,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, #endif ptepage = alloc_pages(flags, 0); - if (ptepage) - clear_highpage(ptepage); + if (!ptepage) + return NULL; + clear_highpage(ptepage); + if (!pgtable_page_ctor(ptepage)) { + __free_page(ptepage); + return NULL; + } return ptepage; } @@ -158,8 +163,9 @@ extern inline void pte_free_slow(struct page *ptepage) __free_page(ptepage); } -extern inline void pte_free(struct mm_struct *mm, struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, struct page *ptepage) { + pgtable_page_dtor(ptepage); __free_page(ptepage); } -- cgit v0.10.2 From 0470d4aa29eb0a49554fe71e986e6d6c38dd85cb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:22 -0800 Subject: mn10300: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Acked-by: David Howells Cc: Koichi Yasutake Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mn10300/include/asm/pgalloc.h b/arch/mn10300/include/asm/pgalloc.h index 146bacf..0f25d5f 100644 --- a/arch/mn10300/include/asm/pgalloc.h +++ b/arch/mn10300/include/asm/pgalloc.h @@ -46,6 +46,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index bd9ada6..e77a7c7 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -78,8 +78,13 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (pte) - clear_highpage(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From b3d59c6eb49476110b4a76bfbae63daae1cda268 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:23 -0800 Subject: openrisc: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Cc: Jonas Bonn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 05c39ec..21484e5b 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -78,8 +78,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); - if (pte) - clear_page(page_address(pte)); + if (!pte) + return NULL; + clear_page(page_address(pte)); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } @@ -90,6 +95,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } -- cgit v0.10.2 From 3fd681b68cd34eacb106b25fcb10bb202a3232c5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:24 -0800 Subject: alpha: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index bc2a0da..aab14a0 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -72,7 +72,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } -- cgit v0.10.2 From ca6ec3bbaaaf5941e970314a2eb3680b9e7e698a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:25 -0800 Subject: arc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Vineet Gupta [for arch/arc bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 36a9f20..81208bfd 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -105,11 +105,16 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { pgtable_t pte_pg; + struct page *page; pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); - if (pte_pg) { - memzero((void *)pte_pg, PTRS_PER_PTE * 4); - pgtable_page_ctor(virt_to_page(pte_pg)); + if (!pte_pg) + return 0; + memzero((void *)pte_pg, PTRS_PER_PTE * 4); + page = virt_to_page(pte_pg); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return 0; } return pte_pg; -- cgit v0.10.2 From affce5089a82a045274def17b44b13d10ab7c1d4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:26 -0800 Subject: arm: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 943504f..78a7793 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -102,12 +102,14 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) #else pte = alloc_pages(PGALLOC_GFP, 0); #endif - if (pte) { - if (!PageHighMem(pte)) - clean_pte_table(page_address(pte)); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } - return pte; } -- cgit v0.10.2 From d97a22913808b191c95fbfc51e6405c4504979e6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:27 -0800 Subject: arm64: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index f214069..9bea6e7 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -63,9 +63,12 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) struct page *pte; pte = alloc_pages(PGALLOC_GFP, 0); - if (pte) - pgtable_page_ctor(pte); - + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From 2cb6182bb6153f685c37773687e63094317b1eeb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:28 -0800 Subject: avr32: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index bc7e8ae..1aba19d 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -68,7 +68,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(QUICK_PT, NULL, pg); + return NULL; + } return page; } -- cgit v0.10.2 From 0da5303bdb55e67963e8b53ebf3a2aa75051cab4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:29 -0800 Subject: cris: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Mikael Starvik Acked-by: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index d9504d3..235ece4 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -34,7 +34,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); if (!pte) return NULL; - pgtable_page_ctor(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From 3b9cf77d1aad657dd44ab0f0368978e87b64ad23 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:30 -0800 Subject: frv: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index f6084bc..41907d2 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -37,11 +37,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) #else page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (page) { - clear_highpage(page); - pgtable_page_ctor(page); - flush_dcache_page(page); + if (!page) + return NULL; + + clear_highpage(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; } + flush_dcache_page(page); return page; } -- cgit v0.10.2 From 5de1423d8901bed12f1b96acef27ac133592eaeb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:31 -0800 Subject: hexagon: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Richard Kuo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 679bf6d..4c9d382 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -65,10 +65,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); - - if (pte) - pgtable_page_ctor(pte); - + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From ca973d86d4c5b70c32e7b91ce08f3e8e061e2535 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:32 -0800 Subject: ia64: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 96a8d92..5767cdf 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -91,7 +91,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) if (!pg) return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(0, NULL, pg); + return NULL; + } return page; } -- cgit v0.10.2 From 7251ab6b86179f195b3f4b56d57ce9dc7a725409 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:33 -0800 Subject: m32r: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/include/asm/pgalloc.h b/arch/m32r/include/asm/pgalloc.h index ac4208b..2d55a06 100644 --- a/arch/m32r/include/asm/pgalloc.h +++ b/arch/m32r/include/asm/pgalloc.h @@ -45,7 +45,10 @@ static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; - pgtable_page_ctor(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From f84c914b986ed2ec4ffaa5672b423b1f6b65519d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:34 -0800 Subject: m68k: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 313f3dd..f9924fb 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -56,6 +56,10 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, if (!page) return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } pte = kmap(page); if (pte) { diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 2f02f26..24bcba4 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -29,18 +29,22 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + struct page *page; pte_t *pte; + page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); if(!page) return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } pte = kmap(page); __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); nocache_page(pte); kunmap(page); - pgtable_page_ctor(page); return page; } diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 48d80d5..f868506 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -59,7 +59,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; clear_highpage(page); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } -- cgit v0.10.2 From 855a30531806388cd63940cb03a18d6546404805 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:35 -0800 Subject: metag: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: James Hogan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 275d928..3104df0 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -52,8 +52,12 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From 3b5b51c1a7bd148e1d7721874849435d76728375 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:36 -0800 Subject: mips: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 881d18b4..b336037 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -80,9 +80,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); - if (pte) { - clear_highpage(pte); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } return pte; } -- cgit v0.10.2 From bc16640dce9035177c99d8fb11d3b94abe9f36c8 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:37 -0800 Subject: parisc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index fc987a1..f213f5b 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -121,8 +121,12 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - if (page) - pgtable_page_ctor(page); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } -- cgit v0.10.2 From 4f804943f99454ac79e0f448428447f1a72d09fc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:38 -0800 Subject: powerpc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index f65e27b..16cb92d 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -91,7 +91,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6c856fb..5b96017 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -121,7 +121,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) ptepage = alloc_pages(flags, 0); if (!ptepage) return NULL; - pgtable_page_ctor(ptepage); + if (!pgtable_page_ctor(ptepage)) { + __free_page(ptepage); + return NULL; + } return ptepage; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 536eec72..9d95786 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -378,6 +378,10 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) __GFP_REPEAT | __GFP_ZERO); if (!page) return NULL; + if (!kernel && !pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } ret = page_address(page); spin_lock(&mm->page_table_lock); @@ -392,9 +396,6 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) } spin_unlock(&mm->page_table_lock); - if (!kernel) - pgtable_page_ctor(page); - return (pte_t *)ret; } -- cgit v0.10.2 From e89cfa58a8358fdb4d4e79936c25222416ad415e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:39 -0800 Subject: s390: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1ea18fc..e794c88 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -772,7 +772,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, __free_page(page); return NULL; } - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + kfree(mp); + __free_page(page); + return NULL; + } mp->vmaddr = vmaddr & PMD_MASK; INIT_LIST_HEAD(&mp->mapper); page->index = (unsigned long) mp; @@ -902,7 +906,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE); -- cgit v0.10.2 From 96da3a62ea1fef2d9dfa8eff97706603918d5f4d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:40 -0800 Subject: score: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Chen Liqin Acked-by: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 716b3fd..2e06765 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -54,9 +54,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); - if (pte) { - clear_highpage(pte); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } return pte; } -- cgit v0.10.2 From 478cf8ca013f9e33be978dee4d0a15906e5a030a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:41 -0800 Subject: sh: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 8c00785..a33673b 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -47,7 +47,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pg) return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(QUICK_PT, NULL, pg); + return NULL; + } return page; } -- cgit v0.10.2 From 1ae9ae5f7df726c67736c643c1f03f7bdfe748eb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:42 -0800 Subject: sparc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ed82eda..d6de935 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2519,12 +2519,13 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, return pte; page = __alloc_for_cache(mm); - if (page) { - pgtable_page_ctor(page); - pte = (pte_t *) page_address(page); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + free_hot_cold_page(page, 0); + return NULL; } - - return pte; + return (pte_t *) page_address(page); } void pte_free_kernel(struct mm_struct *mm, pte_t *pte) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 5d721df..869023a 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -345,7 +345,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0) return NULL; page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } -- cgit v0.10.2 From 76b3aec332e74596c517e1198764a5aeb8ea8398 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:43 -0800 Subject: tile: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 4fd9ec0..5e86eac 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -241,6 +241,11 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, if (p == NULL) return NULL; + if (!pgtable_page_ctor(p)) { + __free_pages(p, L2_USER_PGTABLE_ORDER); + return NULL; + } + /* * Make every page have a page_count() of one, not just the first. * We don't use __GFP_COMP since it doesn't look like it works @@ -251,7 +256,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, inc_zone_page_state(p+i, NR_PAGETABLE); } - pgtable_page_ctor(p); return p; } -- cgit v0.10.2 From 647f884e67dd920e01046a1ae87aa2c2d315479a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:44 -0800 Subject: um: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Jeff Dike Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 7ddb64b..8636e90 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -279,8 +279,12 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From e90bedbb8a847a0e6debe0e00b6f69b788fca92e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:46 -0800 Subject: unicore32: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 0213e37..2e02d13 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -51,12 +51,14 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) struct page *pte; pte = alloc_pages(PGALLOC_GFP, 0); - if (pte) { - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); - } - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!PageHighMem(pte)) { + void *page = page_address(pte); + clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); + } + if (!pgtable_page_ctor(pte)) { + __free_page(pte); } return pte; -- cgit v0.10.2 From cecbd1b5afb01ed6ee14e14234b2e8c52e26c841 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:47 -0800 Subject: x86: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1a7d213..a7cccb6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -25,8 +25,12 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; pte = alloc_pages(__userpte_alloc_gfp, 0); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } -- cgit v0.10.2 From 8f43123d170ad9a6760e424af5bde7569fdd8c12 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:48 -0800 Subject: xtensa: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index 037671a..b8774f1 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -58,7 +58,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + kmem_cache_free(pgtable_cache, pte); + return NULL; + } return page; } -- cgit v0.10.2 From 01058e70767de7b846588ef651c4d66e862f6823 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:49 -0800 Subject: iommu/arm-smmu: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 181c9ba..2349d62 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1212,7 +1212,10 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, arm_smmu_flush_pgtable(smmu, page_address(table), ARM_SMMU_PTE_HWTABLE_SIZE); - pgtable_page_ctor(table); + if (!pgtable_page_ctor(table)) { + __free_page(table); + return -ENOMEM; + } pmd_populate(NULL, pmd, table); arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd)); } -- cgit v0.10.2 From f820e2805c7acb157a78438d07e47f4fc57fe679 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:50 -0800 Subject: xtensa: use buddy allocator for PTE table At the moment xtensa uses slab allocator for PTE table. It doesn't work with enabled split page table lock: slab uses page->slab_cache and page->first_page for its pages. These fields share stroage with page->ptl. Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Acked-by: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index b8774f1..d38eb92 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long)pgd); } -/* Use a slab cache for the pte pages (see also sparc64 implementation) */ - -extern struct kmem_cache *pgtable_cache; - static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT); + pte_t *ptep; + int i; + + ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (!ptep) + return NULL; + for (i = 0; i < 1024; i++) + pte_clear(NULL, 0, ptep + i); + return ptep; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -59,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; page = virt_to_page(pte); if (!pgtable_page_ctor(page)) { - kmem_cache_free(pgtable_cache, pte); + __free_page(page); return NULL; } return page; @@ -67,13 +71,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - kmem_cache_free(pgtable_cache, pte); + free_page((unsigned long)pte); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { pgtable_page_dtor(pte); - kmem_cache_free(pgtable_cache, page_address(pte)); + __free_page(pte); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 0fdf5d0..2164462 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -220,12 +220,11 @@ extern unsigned long empty_zero_page[1024]; #ifdef CONFIG_MMU extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)]; extern void paging_init(void); -extern void pgtable_cache_init(void); #else # define swapper_pg_dir NULL static inline void paging_init(void) { } -static inline void pgtable_cache_init(void) { } #endif +static inline void pgtable_cache_init(void) { } /* * The pmd contains the kernel virtual address of the pte page. diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index a107757..c43771c 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -50,23 +50,3 @@ void __init init_mmu(void) */ set_ptevaddr_register(PGTABLE_START); } - -struct kmem_cache *pgtable_cache __read_mostly; - -static void pgd_ctor(void *addr) -{ - pte_t *ptep = (pte_t *)addr; - int i; - - for (i = 0; i < 1024; i++, ptep++) - pte_clear(NULL, 0, ptep); - -} - -void __init pgtable_cache_init(void) -{ - pgtable_cache = kmem_cache_create("pgd", - PAGE_SIZE, PAGE_SIZE, - SLAB_HWCACHE_ALIGN, - pgd_ctor); -} -- cgit v0.10.2 From 49076ec2ccaf68610aa03d96bced9a6694b93ca1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:51 -0800 Subject: mm: dynamically allocate page->ptl if it cannot be embedded to struct page If split page table lock is in use, we embed the lock into struct page of table's page. We have to disable split lock, if spinlock_t is too big be to be embedded, like when DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC enabled. This patch add support for dynamic allocation of split page table lock if we can't embed it to struct page. page->ptl is unsigned long now and we use it as spinlock_t if sizeof(spinlock_t) <= sizeof(long), otherwise it's pointer to spinlock_t. The spinlock_t allocated in pgtable_page_ctor() for PTE table and in pgtable_pmd_page_ctor() for PMD table. All other helpers converted to support dynamically allocated page->ptl. Signed-off-by: Kirill A. Shutemov Reviewed-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock new file mode 100644 index 0000000..7521d36 --- /dev/null +++ b/Documentation/vm/split_page_table_lock @@ -0,0 +1,94 @@ +Split page table lock +===================== + +Originally, mm->page_table_lock spinlock protected all page tables of the +mm_struct. But this approach leads to poor page fault scalability of +multi-threaded applications due high contention on the lock. To improve +scalability, split page table lock was introduced. + +With split page table lock we have separate per-table lock to serialize +access to the table. At the moment we use split lock for PTE and PMD +tables. Access to higher level tables protected by mm->page_table_lock. + +There are helpers to lock/unlock a table and other accessor functions: + - pte_offset_map_lock() + maps pte and takes PTE table lock, returns pointer to the taken + lock; + - pte_unmap_unlock() + unlocks and unmaps PTE table; + - pte_alloc_map_lock() + allocates PTE table if needed and take the lock, returns pointer + to taken lock or NULL if allocation failed; + - pte_lockptr() + returns pointer to PTE table lock; + - pmd_lock() + takes PMD table lock, returns pointer to taken lock; + - pmd_lockptr() + returns pointer to PMD table lock; + +Split page table lock for PTE tables is enabled compile-time if +CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS. +If split lock is disabled, all tables guaded by mm->page_table_lock. + +Split page table lock for PMD tables is enabled, if it's enabled for PTE +tables and the architecture supports it (see below). + +Hugetlb and split page table lock +--------------------------------- + +Hugetlb can support several page sizes. We use split lock only for PMD +level, but not for PUD. + +Hugetlb-specific helpers: + - huge_pte_lock() + takes pmd split lock for PMD_SIZE page, mm->page_table_lock + otherwise; + - huge_pte_lockptr() + returns pointer to table lock; + +Support of split page table lock by an architecture +--------------------------------------------------- + +There's no need in special enabling of PTE split page table lock: +everything required is done by pgtable_page_ctor() and pgtable_page_dtor(), +which must be called on PTE table allocation / freeing. + +Make sure the architecture doesn't use slab allocator for page table +allocation: slab uses page->slab_cache and page->first_page for its pages. +These fields share storage with page->ptl. + +PMD split lock only makes sense if you have more than two page table +levels. + +PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table +allocation and pgtable_pmd_page_dtor() on freeing. + +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but +make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE +preallocate few PMDs on pgd_alloc(). + +With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. + +NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must +be handled properly. + +page->ptl +--------- + +page->ptl is used to access split page table lock, where 'page' is struct +page of page containing the table. It shares storage with page->private +(and few other fields in union). + +To avoid increasing size of struct page and have best performance, we use a +trick: + - if spinlock_t fits into long, we use page->ptr as spinlock, so we + can avoid indirect access and save a cache line. + - if size of spinlock_t is bigger then size of long, we use page->ptl as + pointer to spinlock_t and allocate it dynamically. This allows to use + split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs + one more cache line for indirect access; + +The spinlock_t allocated in pgtable_page_ctor() for PTE table and in +pgtable_pmd_page_ctor() for PMD table. + +Please, never access page->ptl directly -- use appropriate helper. diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 455c873..49c962f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -797,7 +797,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) spinlock_t *ptl = NULL; #if USE_SPLIT_PTE_PTLOCKS - ptl = __pte_lockptr(page); + ptl = ptlock_ptr(page); spin_lock_nest_lock(ptl, &mm->page_table_lock); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index e855351..d033974 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1317,32 +1317,73 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #if USE_SPLIT_PTE_PTLOCKS -/* - * We tuck a spinlock to guard each pagetable page into its struct page, - * at page->private, with BUILD_BUG_ON to make sure that this will not - * overflow into the next struct page (as it might with DEBUG_SPINLOCK). - * When freeing, reset page->mapping so free_pages_check won't complain. - */ -#define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) -#define pte_lock_deinit(page) ((page)->mapping = NULL) -#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) +bool __ptlock_alloc(struct page *page); +void __ptlock_free(struct page *page); +static inline bool ptlock_alloc(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + return __ptlock_alloc(page); + return true; +} +static inline void ptlock_free(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + __ptlock_free(page); +} + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + return (spinlock_t *) page->ptl; + else + return (spinlock_t *) &page->ptl; +} + +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return ptlock_ptr(pmd_page(*pmd)); +} + +static inline bool ptlock_init(struct page *page) +{ + /* + * prep_new_page() initialize page->private (and therefore page->ptl) + * with 0. Make sure nobody took it in use in between. + * + * It can happen if arch try to use slab for page table allocation: + * slab code uses page->slab_cache and page->first_page (for tail + * pages), which share storage with page->ptl. + */ + VM_BUG_ON(page->ptl); + if (!ptlock_alloc(page)) + return false; + spin_lock_init(ptlock_ptr(page)); + return true; +} + +/* Reset page->mapping so free_pages_check won't complain. */ +static inline void pte_lock_deinit(struct page *page) +{ + page->mapping = NULL; + ptlock_free(page); +} + #else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ -#define pte_lock_init(page) do {} while (0) -#define pte_lock_deinit(page) do {} while (0) -#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} +static inline bool ptlock_init(struct page *page) { return true; } +static inline void pte_lock_deinit(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ static inline bool pgtable_page_ctor(struct page *page) { - pte_lock_init(page); inc_zone_page_state(page, NR_PAGETABLE); - return true; + return ptlock_init(page); } static inline void pgtable_page_dtor(struct page *page) @@ -1383,16 +1424,15 @@ static inline void pgtable_page_dtor(struct page *page) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return &virt_to_page(pmd)->ptl; + return ptlock_ptr(virt_to_page(pmd)); } static inline bool pgtable_pmd_page_ctor(struct page *page) { - spin_lock_init(&page->ptl); #ifdef CONFIG_TRANSPARENT_HUGEPAGE page->pmd_huge_pte = NULL; #endif - return true; + return ptlock_init(page); } static inline void pgtable_pmd_page_dtor(struct page *page) @@ -1400,6 +1440,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON(page->pmd_huge_pte); #endif + ptlock_free(page); } #define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9342610..423da79 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -147,7 +147,10 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTE_PTLOCKS - spinlock_t ptl; + unsigned long ptl; /* It's spinlock_t if it fits to long, + * otherwise it's pointer to dynamicaly + * allocated spinlock_t. + */ #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ diff --git a/mm/Kconfig b/mm/Kconfig index 7aa02de..de31af2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -218,8 +218,6 @@ config SPLIT_PTLOCK_CPUS int default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 - default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC - default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" config ARCH_ENABLE_SPLIT_PMD_PTLOCK diff --git a/mm/memory.c b/mm/memory.c index d05c6b1..24ffae2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4270,3 +4270,22 @@ void copy_user_huge_page(struct page *dst, struct page *src, } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ + +#if USE_SPLIT_PTE_PTLOCKS +bool __ptlock_alloc(struct page *page) +{ + spinlock_t *ptl; + + ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (!ptl) + return false; + page->ptl = (unsigned long)ptl; + return true; +} + +void __ptlock_free(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + kfree((spinlock_t *)page->ptl); +} +#endif -- cgit v0.10.2 From 539edb5846c740d78a8b6c2e43a99ca4323df68f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 14:31:52 -0800 Subject: mm: properly separate the bloated ptl from the regular case Use kernel/bounds.c to convert build-time spinlock_t size check into a preprocessor symbol and apply that to properly separate the page::ptl situation. Signed-off-by: Peter Zijlstra Signed-off-by: Kirill A. Shutemov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index d033974..1cedd00 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1317,27 +1317,29 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #if USE_SPLIT_PTE_PTLOCKS -bool __ptlock_alloc(struct page *page); -void __ptlock_free(struct page *page); +#if BLOATED_SPINLOCKS +extern bool ptlock_alloc(struct page *page); +extern void ptlock_free(struct page *page); + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + return page->ptl; +} +#else /* BLOATED_SPINLOCKS */ static inline bool ptlock_alloc(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - return __ptlock_alloc(page); return true; } + static inline void ptlock_free(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - __ptlock_free(page); } static inline spinlock_t *ptlock_ptr(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - return (spinlock_t *) page->ptl; - else - return (spinlock_t *) &page->ptl; + return &page->ptl; } +#endif /* BLOATED_SPINLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { @@ -1354,7 +1356,7 @@ static inline bool ptlock_init(struct page *page) * slab code uses page->slab_cache and page->first_page (for tail * pages), which share storage with page->ptl. */ - VM_BUG_ON(page->ptl); + VM_BUG_ON(*(unsigned long *)&page->ptl); if (!ptlock_alloc(page)) return false; spin_lock_init(ptlock_ptr(page)); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 423da79..10f5a72 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -147,10 +147,11 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTE_PTLOCKS - unsigned long ptl; /* It's spinlock_t if it fits to long, - * otherwise it's pointer to dynamicaly - * allocated spinlock_t. - */ +#if BLOATED_SPINLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ diff --git a/kernel/bounds.c b/kernel/bounds.c index e8ca97b..578782e 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -11,6 +11,7 @@ #include #include #include +#include void foo(void) { @@ -21,5 +22,6 @@ void foo(void) #ifdef CONFIG_SMP DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif + DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int)); /* End of constants */ } diff --git a/mm/memory.c b/mm/memory.c index 24ffae2..5d9025f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4271,21 +4271,20 @@ void copy_user_huge_page(struct page *dst, struct page *src, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ -#if USE_SPLIT_PTE_PTLOCKS -bool __ptlock_alloc(struct page *page) +#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS +bool ptlock_alloc(struct page *page) { spinlock_t *ptl; ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); if (!ptl) return false; - page->ptl = (unsigned long)ptl; + page->ptl = ptl; return true; } -void __ptlock_free(struct page *page) +void ptlock_free(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - kfree((spinlock_t *)page->ptl); + kfree(page->ptl); } #endif -- cgit v0.10.2 From ea1e7ed33708c7a760419ff9ded0a6cb90586a50 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:53 -0800 Subject: mm: create a separate slab for page->ptl allocation If DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC are enabled spinlock_t on x86_64 is 72 bytes. For page->ptl they will be allocated from kmalloc-96 slab, so we loose 24 on each. An average system can easily allocate few tens thousands of page->ptl and overhead is significant. Let's create a separate slab for page->ptl allocation to solve this. Signed-off-by: Kirill A. Shutemov Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 1cedd00..0548eb2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1318,6 +1318,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #if USE_SPLIT_PTE_PTLOCKS #if BLOATED_SPINLOCKS +void __init ptlock_cache_init(void); extern bool ptlock_alloc(struct page *page); extern void ptlock_free(struct page *page); @@ -1326,6 +1327,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page) return page->ptl; } #else /* BLOATED_SPINLOCKS */ +static inline void ptlock_cache_init(void) {} static inline bool ptlock_alloc(struct page *page) { return true; @@ -1378,10 +1380,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct page *page) { return true; } static inline void pte_lock_deinit(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ +static inline void pgtable_init(void) +{ + ptlock_cache_init(); + pgtable_cache_init(); +} + static inline bool pgtable_page_ctor(struct page *page) { inc_zone_page_state(page, NR_PAGETABLE); diff --git a/init/main.c b/init/main.c index 6ad1a53..5f19113 100644 --- a/init/main.c +++ b/init/main.c @@ -473,7 +473,7 @@ static void __init mm_init(void) mem_init(); kmem_cache_init(); percpu_init_late(); - pgtable_cache_init(); + pgtable_init(); vmalloc_init(); } diff --git a/mm/memory.c b/mm/memory.c index 5d9025f..0409e8f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4272,6 +4272,13 @@ void copy_user_huge_page(struct page *dst, struct page *src, #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS +static struct kmem_cache *page_ptl_cachep; +void __init ptlock_cache_init(void) +{ + page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, + SLAB_PANIC, NULL); +} + bool ptlock_alloc(struct page *page) { spinlock_t *ptl; -- cgit v0.10.2 From 57f4257eae33e036125973858934730250d464e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 14:31:54 -0800 Subject: lockref: use BLOATED_SPINLOCKS to avoid explicit config dependencies Avoid the fragile Kconfig construct guestimating spinlock_t sizes; use a friendly compile-time test to determine this. [kirill.shutemov@linux.intel.com: drop CONFIG_CMPXCHG_LOCKREF] Signed-off-by: Peter Zijlstra Signed-off-by: Kirill A. Shutemov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 13dfd36..c8929c3 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -15,10 +15,15 @@ */ #include +#include + +#define USE_CMPXCHG_LOCKREF \ + (IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \ + IS_ENABLED(CONFIG_SMP) && !BLOATED_SPINLOCKS) struct lockref { union { -#ifdef CONFIG_CMPXCHG_LOCKREF +#if USE_CMPXCHG_LOCKREF aligned_u64 lock_count; #endif struct { diff --git a/lib/Kconfig b/lib/Kconfig index 75485e1..06dc742 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -51,13 +51,6 @@ config PERCPU_RWSEM config ARCH_USE_CMPXCHG_LOCKREF bool -config CMPXCHG_LOCKREF - def_bool y if ARCH_USE_CMPXCHG_LOCKREF - depends on SMP - depends on !GENERIC_LOCKBREAK - depends on !DEBUG_SPINLOCK - depends on !DEBUG_LOCK_ALLOC - config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/lockref.c b/lib/lockref.c index af6e95d..d2b123f 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,7 +1,7 @@ #include #include -#ifdef CONFIG_CMPXCHG_LOCKREF +#if USE_CMPXCHG_LOCKREF /* * Allow weakly-ordered memory architectures to provide barrier-less -- cgit v0.10.2 From 839cc2a94cc3665bafe32203c2f095f4dd470a80 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 14 Nov 2013 14:31:56 -0800 Subject: seq_file: introduce seq_setwidth() and seq_pad() There are several users who want to know bytes written by seq_*() for alignment purpose. Currently they are using %n format for knowing it because seq_*() returns 0 on success. This patch introduces seq_setwidth() and seq_pad() for allowing them to align without using %n format. Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Cc: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/seq_file.c b/fs/seq_file.c index a290157..1cd2388 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -766,6 +766,21 @@ int seq_write(struct seq_file *seq, const void *data, size_t len) } EXPORT_SYMBOL(seq_write); +/** + * seq_pad - write padding spaces to buffer + * @m: seq_file identifying the buffer to which data should be written + * @c: the byte to append after padding if non-zero + */ +void seq_pad(struct seq_file *m, char c) +{ + int size = m->pad_until - m->count; + if (size > 0) + seq_printf(m, "%*s", size, ""); + if (c) + seq_putc(m, c); +} +EXPORT_SYMBOL(seq_pad); + struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 4e32edc..52e0097 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -20,6 +20,7 @@ struct seq_file { size_t size; size_t from; size_t count; + size_t pad_until; loff_t index; loff_t read_pos; u64 version; @@ -79,6 +80,20 @@ static inline void seq_commit(struct seq_file *m, int num) } } +/** + * seq_setwidth - set padding width + * @m: the seq_file handle + * @size: the max number of bytes to pad. + * + * Call seq_setwidth() for setting max width, then call seq_printf() etc. and + * finally call seq_pad() to pad the remaining bytes. + */ +static inline void seq_setwidth(struct seq_file *m, size_t size) +{ + m->pad_until = m->count + size; +} +void seq_pad(struct seq_file *m, char c); + char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); -- cgit v0.10.2 From 652586df95e5d76b37d07a11839126dcfede1621 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 14 Nov 2013 14:31:57 -0800 Subject: seq_file: remove "%n" usage from seq_file users All seq_printf() users are using "%n" for calculating padding size, convert them to use seq_setwidth() / seq_pad() pair. Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Cc: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index b701eaa..51942d5 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -29,7 +29,6 @@ static int show_console_dev(struct seq_file *m, void *v) char flags[ARRAY_SIZE(con_flags) + 1]; struct console *con = v; unsigned int a; - int len; dev_t dev = 0; if (con->device) { @@ -47,11 +46,10 @@ static int show_console_dev(struct seq_file *m, void *v) con_flags[a].name : ' '; flags[a] = 0; - seq_printf(m, "%s%d%n", con->name, con->index, &len); - len = 21 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); + seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con->write ? 'W' : '-', con->unblank ? 'U' : '-', flags); if (dev) diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index ccfd99b..5f9bc8a 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; flags = region->vm_flags; file = region->vm_file; @@ -50,8 +50,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) ino = inode->i_ino; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", region->vm_start, region->vm_end, flags & VM_READ ? 'r' : '-', @@ -59,13 +60,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', ((loff_t)region->vm_pgoff) << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 42b5cf5..fb52b54 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -84,14 +84,6 @@ unsigned long task_statm(struct mm_struct *mm, return mm->total_vm; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - #ifdef CONFIG_NUMA /* * These functions are for numa_maps but called in generic **maps seq_file @@ -269,7 +261,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - int len; const char *name = NULL; if (file) { @@ -287,7 +278,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", start, end, flags & VM_READ ? 'r' : '-', @@ -295,14 +287,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, "\n"); goto done; } @@ -334,7 +326,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_printf(m, "[stack:%d]", tid); } } @@ -342,7 +334,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) done: if (name) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 56123a6..678455d 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,14 +123,6 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - /* * display a single VMA to a sequenced file */ @@ -142,7 +134,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; unsigned long long pgoff = 0; flags = vma->vm_flags; @@ -155,8 +147,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', @@ -164,16 +157,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { pid_t tid = vm_is_stack(priv->task, vma, is_pid); if (tid != 0) { - pad_len_spaces(m, len); + seq_pad(m, ' '); /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec9a9ef..5afeb5a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2523,16 +2523,17 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) list_for_each_entry_rcu(fa, &li->falh, fa_list) { const struct fib_info *fi = fa->fa_info; unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); - int len; if (fa->fa_type == RTN_BROADCAST || fa->fa_type == RTN_MULTICAST) continue; + seq_setwidth(seq, 127); + if (fi) seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u%n", + "%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, fi->fib_nh->nh_gw, flags, 0, 0, @@ -2541,15 +2542,15 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) (fi->fib_advmss ? fi->fib_advmss + 40 : 0), fi->fib_window, - fi->fib_rtt >> 3, &len); + fi->fib_rtt >> 3); else seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u%n", + "%d\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, - mask, 0, 0, 0, &len); + mask, 0, 0, 0); - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_pad(seq, '\n'); } } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9afbdb1..cbc85f6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1076,7 +1076,7 @@ void ping_seq_stop(struct seq_file *seq, void *v) EXPORT_SYMBOL_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, - int bucket, int *len) + int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; @@ -1085,7 +1085,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), @@ -1093,23 +1093,22 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops), len); + atomic_read(&sp->sk_drops)); } static int ping_v4_seq_show(struct seq_file *seq, void *v) { + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct ping_iter_state *state = seq->private; - int len; - ping_v4_format_sock(v, seq, state->bucket, &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + ping_v4_format_sock(v, seq, state->bucket); } + seq_pad(seq, '\n'); return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 14bba8a..59a6f8b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2541,13 +2541,13 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, kuid_t uid, int *len) + struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", i, ireq->ir_loc_addr, ntohs(inet_sk(sk)->inet_sport), @@ -2562,11 +2562,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), - req, - len); + req); } -static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) +static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) { int timer_active; unsigned long timer_expires; @@ -2605,7 +2604,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n", + "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, rx_queue, @@ -2622,12 +2621,11 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->snd_cwnd, sk->sk_state == TCP_LISTEN ? (fastopenq ? fastopenq->max_qlen : 0) : - (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), - len); + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); } static void get_timewait4_sock(const struct inet_timewait_sock *tw, - struct seq_file *f, int i, int *len) + struct seq_file *f, int i) { __be32 dest, src; __u16 destp, srcp; @@ -2639,10 +2637,10 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, srcp = ntohs(tw->tw_sport); seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw, len); + atomic_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 @@ -2651,11 +2649,10 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; struct sock *sk = v; - int len; + seq_setwidth(seq, TMPSZ - 1); if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-*s\n", TMPSZ - 1, - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode"); goto out; @@ -2666,16 +2663,16 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_ESTABLISHED: if (sk->sk_state == TCP_TIME_WAIT) - get_timewait4_sock(v, seq, st->num, &len); + get_timewait4_sock(v, seq, st->num); else - get_tcp4_sock(v, seq, st->num, &len); + get_tcp4_sock(v, seq, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); + get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); break; } - seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); out: + seq_pad(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 89909dd..de86e5b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2331,7 +2331,7 @@ EXPORT_SYMBOL(udp_proc_unregister); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, - int bucket, int *len) + int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; @@ -2340,7 +2340,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), @@ -2348,23 +2348,22 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops), len); + atomic_read(&sp->sk_drops)); } int udp4_seq_show(struct seq_file *seq, void *v) { + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; - int len; - udp4_format_sock(v, seq, state->bucket, &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + udp4_format_sock(v, seq, state->bucket); } + seq_pad(seq, '\n'); return 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 77e38f7..008214a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -595,26 +595,25 @@ static void pn_sock_seq_stop(struct seq_file *seq, void *v) static int pn_sock_seq_show(struct seq_file *seq, void *v) { - int len; - + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%s%n", "pt loc rem rs st tx_queue rx_queue " - " uid inode ref pointer drops", &len); + seq_puts(seq, "pt loc rem rs st tx_queue rx_queue " + " uid inode ref pointer drops"); else { struct sock *sk = v; struct pn_sock *pn = pn_sk(sk); seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " - "%d %pK %d%n", + "%d %pK %d", sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - atomic_read(&sk->sk_drops), &len); + atomic_read(&sk->sk_drops)); } - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_pad(seq, '\n'); return 0; } @@ -785,20 +784,19 @@ static void pn_res_seq_stop(struct seq_file *seq, void *v) static int pn_res_seq_show(struct seq_file *seq, void *v) { - int len; - + seq_setwidth(seq, 63); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%s%n", "rs uid inode", &len); + seq_puts(seq, "rs uid inode"); else { struct sock **psk = v; struct sock *sk = *psk; - seq_printf(seq, "%02X %5u %lu%n", + seq_printf(seq, "%02X %5u %lu", (int) (psk - pnres.sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), - sock_i_ino(sk), &len); + sock_i_ino(sk)); } - seq_printf(seq, "%*s\n", 63 - len, ""); + seq_pad(seq, '\n'); return 0; } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 5ea573b..647396b 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -79,12 +79,13 @@ static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { */ static int sctp_objcnt_seq_show(struct seq_file *seq, void *v) { - int i, len; + int i; i = (int)*(loff_t *)v; - seq_printf(seq, "%s: %d%n", sctp_dbg_objcnt[i].label, - atomic_read(sctp_dbg_objcnt[i].counter), &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_setwidth(seq, 127); + seq_printf(seq, "%s: %d", sctp_dbg_objcnt[i].label, + atomic_read(sctp_dbg_objcnt[i].counter)); + seq_pad(seq, '\n'); return 0; } -- cgit v0.10.2 From 9196436ab2f713b823a2ba2024cb69f40b2f54a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 14 Nov 2013 14:31:58 -0800 Subject: vsprintf: ignore %n again This ignores %n in printf again, as was originally documented. Implementing %n poses a greater security risk than utility, so it should stay ignored. To help anyone attempting to use %n, a warning will be emitted if it is encountered. Based on an earlier patch by Joe Perches. Because %n was designed to write to pointers on the stack, it has been frequently used as an attack vector when bugs are found that leak user-controlled strings into functions that ultimately process format strings. While this class of bug can still be turned into an information leak, removing %n eliminates the common method of elevating such a bug into an arbitrary kernel memory writing primitive, significantly reducing the danger of this class of bug. For seq_file users that need to know the length of a written string for padding, please see seq_setwidth() and seq_pad() instead. Signed-off-by: Kees Cook Cc: Joe Perches Cc: Tetsuo Handa Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 48586ac..10909c5 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1712,18 +1712,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_NRCHARS: { - u8 qualifier = spec.qualifier; + /* + * Since %n poses a greater security risk than + * utility, ignore %n and skip its argument. + */ + void *skip_arg; - if (qualifier == 'l') { - long *ip = va_arg(args, long *); - *ip = (str - buf); - } else if (_tolower(qualifier) == 'z') { - size_t *ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int *ip = va_arg(args, int *); - *ip = (str - buf); - } + WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", + old_fmt); + + skip_arg = va_arg(args, void *); break; } -- cgit v0.10.2 From d5ceede8dc86278d16dcad8f916ef323b5672bd8 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 14 Nov 2013 14:31:59 -0800 Subject: drivers/rtc/rtc-hid-sensor-time.c: use dev_get_platdata() Use the wrapper function for retrieving the platform data instead of accessing dev->platform_data directly. This is a cosmetic change to make the code simpler and enhance the readability. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 45560ff..1ba3690 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -236,7 +236,7 @@ static const struct rtc_class_ops hid_time_rtc_ops = { static int hid_time_probe(struct platform_device *pdev) { int ret = 0; - struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + struct hid_sensor_hub_device *hsdev = dev_get_platdata(&pdev->dev); struct hid_time_state *time_state = devm_kzalloc(&pdev->dev, sizeof(struct hid_time_state), GFP_KERNEL); @@ -303,7 +303,7 @@ err_open: static int hid_time_remove(struct platform_device *pdev) { - struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + struct hid_sensor_hub_device *hsdev = dev_get_platdata(&pdev->dev); sensor_hub_device_close(hsdev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME); -- cgit v0.10.2 From 406bf31893163cbe5b0b03a281685c7dc95c9380 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Thu, 14 Nov 2013 14:32:00 -0800 Subject: drivers/rtc/rtc-hid-sensor-time.c: enable HID input processing early Enable the processing of HID input records before the RTC will be registered, in order to allow the RTC register function to read clock. Without doing that the clock can only be read after the probe function has finished. Signed-off-by: Alexander Holler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 1ba3690..a34e5cf 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -281,11 +281,18 @@ static int hid_time_probe(struct platform_device *pdev) goto err_open; } + /* + * Enable HID input processing early in order to be able to read the + * clock already in devm_rtc_device_register(). + */ + hid_device_io_start(hsdev->hdev); + time_state->rtc = devm_rtc_device_register(&pdev->dev, "hid-sensor-time", &hid_time_rtc_ops, THIS_MODULE); if (IS_ERR_OR_NULL(time_state->rtc)) { + hid_device_io_stop(hsdev->hdev); ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV; time_state->rtc = NULL; dev_err(&pdev->dev, "rtc device register failed!\n"); -- cgit v0.10.2 From c32f74ab2872994bc8336ed367313da3139350ca Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:01 -0800 Subject: sched: replace INIT_COMPLETION with reinit_completion For the casual device driver writer, it is hard to remember when to use init_completion (to init a completion structure) or INIT_COMPLETION (to *reinit* a completion structure). Furthermore, while all other completion functions exepct a pointer as a parameter, INIT_COMPLETION does not. To make it easier to remember which function to use and to make code more readable, introduce a new inline function with the proper name and consistent argument type. Update the kernel-doc for init_completion while we are here. Signed-off-by: Wolfram Sang Acked-by: Linus Walleij (personally at LCE13) Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/completion.h b/include/linux/completion.h index 22c33e3..124e4b4 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -19,8 +19,8 @@ * * See also: complete(), wait_for_completion() (and friends _timeout, * _interruptible, _interruptible_timeout, and _killable), init_completion(), - * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and - * INIT_COMPLETION(). + * reinit_completion(), and macros DECLARE_COMPLETION(), + * DECLARE_COMPLETION_ONSTACK(). */ struct completion { unsigned int done; @@ -65,7 +65,7 @@ struct completion { /** * init_completion - Initialize a dynamically allocated completion - * @x: completion structure that is to be initialized + * @x: pointer to completion structure that is to be initialized * * This inline function will initialize a dynamically created completion * structure. @@ -76,6 +76,18 @@ static inline void init_completion(struct completion *x) init_waitqueue_head(&x->wait); } +/** + * reinit_completion - reinitialize a completion structure + * @x: pointer to completion structure that is to be reinitialized + * + * This inline function should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ +static inline void reinit_completion(struct completion *x) +{ + x->done = 0; +} + extern void wait_for_completion(struct completion *); extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); -- cgit v0.10.2 From 16735d022f72b20ddbb2274b8e109f69575e9b2b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:02 -0800 Subject: tree-wide: use reinit_completion instead of INIT_COMPLETION Use this new function to make code more comprehensible, since we are reinitialzing the completion, not initializing. [akpm@linux-foundation.org: linux-next resyncs] Signed-off-by: Wolfram Sang Acked-by: Linus Walleij (personally at LCE13) Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/mach-tegra/apbio.c b/arch/arm/mach-tegra/apbio.c index d7aa52e..bc47197 100644 --- a/arch/arm/mach-tegra/apbio.c +++ b/arch/arm/mach-tegra/apbio.c @@ -114,7 +114,7 @@ static int do_dma_transfer(unsigned long apb_add, dma_desc->callback = apb_dma_complete; dma_desc->callback_param = NULL; - INIT_COMPLETION(tegra_apb_wait); + reinit_completion(&tegra_apb_wait); dmaengine_submit(dma_desc); dma_async_issue_pending(tegra_apb_dma_chan); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index fc536f2..7553b6a 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -452,7 +452,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, */ if (use_irq) { /* Clear completion */ - INIT_COMPLETION(host->complete); + reinit_completion(&host->complete); /* Ack stale interrupts */ kw_write_reg(reg_isr, kw_read_reg(reg_isr)); /* Arm timeout */ @@ -717,7 +717,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, return -EINVAL; } - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = sizeof(struct pmu_i2c_hdr) + 1; @@ -748,7 +748,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, hdr->bus = PMU_I2C_BUS_STATUS; - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = 2; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5f997e7..16a2552 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -106,7 +106,7 @@ static int pseries_prepare_late(void) atomic_set(&suspend_data.done, 0); atomic_set(&suspend_data.error, 0); suspend_data.complete = &suspend_work; - INIT_COMPLETION(suspend_work); + reinit_completion(&suspend_work); return 0; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ac33d5f..966f893 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -434,7 +434,7 @@ int af_alg_wait_for_completion(int err, struct af_alg_completion *completion) case -EINPROGRESS: case -EBUSY: wait_for_completion(&completion->completion); - INIT_COMPLETION(completion->completion); + reinit_completion(&completion->completion); err = completion->err; break; }; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 25a5934..1ab8258 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -493,7 +493,7 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -721,7 +721,7 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index e091ef6..432afc0 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -179,7 +179,7 @@ static int do_one_async_hash_op(struct ahash_request *req, ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -336,7 +336,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, ret = wait_for_completion_interruptible( &tresult.completion); if (!ret && !(ret = tresult.err)) { - INIT_COMPLETION(tresult.completion); + reinit_completion(&tresult.completion); break; } /* fall through */ @@ -543,7 +543,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -697,7 +697,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -983,7 +983,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ @@ -1086,7 +1086,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 77bbc82..92d7797 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3017,7 +3017,7 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap) * ourselves at the beginning of each pass over the loop. * * Additionally, all write accesses to &ap->park_req_pending - * through INIT_COMPLETION() (see below) or complete_all() + * through reinit_completion() (see below) or complete_all() * (see ata_scsi_park_store()) are protected by the host lock. * As a result we have that park_req_pending.done is zero on * exit from this function, i.e. when ATA_EH_PARK actions for @@ -3031,7 +3031,7 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap) */ spin_lock_irqsave(ap->lock, flags); - INIT_COMPLETION(ap->park_req_pending); + reinit_completion(&ap->park_req_pending); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { struct ata_eh_info *ehi = &link->eh_info; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ee039af..c12e9b9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -757,7 +757,7 @@ void dpm_resume(pm_message_t state) async_error = 0; list_for_each_entry(dev, &dpm_suspended_list, power.entry) { - INIT_COMPLETION(dev->power.completion); + reinit_completion(&dev->power.completion); if (is_async(dev)) { get_device(dev); async_schedule(async_resume, dev); @@ -1237,7 +1237,7 @@ static void async_suspend(void *data, async_cookie_t cookie) static int device_suspend(struct device *dev) { - INIT_COMPLETION(dev->power.completion); + reinit_completion(&dev->power.completion); if (pm_async_enabled && dev->power.async_suspend) { get_device(dev); diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 4ff85b8..748dea4 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -343,7 +343,7 @@ static int fd_motor_on(int nr) unit[nr].motor = 1; fd_select(nr); - INIT_COMPLETION(motor_on_completion); + reinit_completion(&motor_on_completion); motor_on_timer.data = nr; mod_timer(&motor_on_timer, jiffies + HZ/2); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 0c004ac..b35fc4f 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2808,7 +2808,7 @@ resend_cmd2: /* erase the old error information */ memset(c->err_info, 0, sizeof(ErrorInfo_struct)); return_status = IO_OK; - INIT_COMPLETION(wait); + reinit_completion(&wait); goto resend_cmd2; } @@ -3669,7 +3669,7 @@ static int add_to_scan_list(struct ctlr_info *h) } } if (!found && !h->busy_scanning) { - INIT_COMPLETION(h->scan_wait); + reinit_completion(&h->scan_wait); list_add_tail(&h->scan_list, &scan_q); ret = 1; } diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index d2120ba..73ce739 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -79,7 +79,7 @@ static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data) priv->expires = cur + delay; priv->present = 0; - INIT_COMPLETION(priv->completion); + reinit_completion(&priv->completion); mod_timer(&priv->timer, priv->expires); return 4; diff --git a/drivers/crypto/tegra-aes.c b/drivers/crypto/tegra-aes.c index 2d58da9..fa05e3c 100644 --- a/drivers/crypto/tegra-aes.c +++ b/drivers/crypto/tegra-aes.c @@ -268,7 +268,7 @@ static int aes_start_crypt(struct tegra_aes_dev *dd, u32 in_addr, u32 out_addr, aes_writel(dd, value, TEGRA_AES_SECURE_INPUT_SELECT); aes_writel(dd, out_addr, TEGRA_AES_SECURE_DEST_ADDR); - INIT_COMPLETION(dd->op_complete); + reinit_completion(&dd->op_complete); for (i = 0; i < AES_HW_MAX_ICQ_LENGTH - 1; i++) { do { diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index e5af0e3..0e79951 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -477,7 +477,7 @@ void fw_send_phy_config(struct fw_card *card, phy_config_packet.header[1] = data; phy_config_packet.header[2] = ~data; phy_config_packet.generation = generation; - INIT_COMPLETION(phy_config_done); + reinit_completion(&phy_config_done); card->driver->send_request(card, &phy_config_packet); wait_for_completion_timeout(&phy_config_done, timeout); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c index 1eb86c795..e281070 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c @@ -99,7 +99,7 @@ static int xfer_read(struct i2c_adapter *adap, struct i2c_msg *pmsg) i2c_dev->status = I2C_STAT_INIT; i2c_dev->msg = pmsg; i2c_dev->buf_offset = 0; - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); /* Enable I2C transaction */ temp = ((pmsg->len) << 20) | HI2C_EDID_READ | HI2C_ENABLE_TRANSACTION; diff --git a/drivers/hid/hid-wiimote.h b/drivers/hid/hid-wiimote.h index 75db0c4..cfa63b0 100644 --- a/drivers/hid/hid-wiimote.h +++ b/drivers/hid/hid-wiimote.h @@ -327,7 +327,7 @@ static inline void wiimote_cmd_acquire_noint(struct wiimote_data *wdata) static inline void wiimote_cmd_set(struct wiimote_data *wdata, int cmd, __u32 opt) { - INIT_COMPLETION(wdata->state.ready); + reinit_completion(&wdata->state.ready); wdata->state.cmd = cmd; wdata->state.opt = opt; } diff --git a/drivers/hwmon/jz4740-hwmon.c b/drivers/hwmon/jz4740-hwmon.c index e0d66b9..a183e48 100644 --- a/drivers/hwmon/jz4740-hwmon.c +++ b/drivers/hwmon/jz4740-hwmon.c @@ -66,7 +66,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev, mutex_lock(&hwmon->lock); - INIT_COMPLETION(*completion); + reinit_completion(completion); enable_irq(hwmon->irq); hwmon->cell->enable(to_platform_device(dev)); diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index fd05930..8edba9d 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -371,7 +371,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) dev_dbg(dev->dev, "transfer: %s %d bytes.\n", (dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->transfer_status = 0; if (!dev->buf_len) { diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index ea4b08f..d7e8600 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -151,7 +151,7 @@ static int bcm2835_i2c_xfer_msg(struct bcm2835_i2c_dev *i2c_dev, i2c_dev->msg_buf = msg->buf; i2c_dev->msg_buf_remaining = msg->len; - INIT_COMPLETION(i2c_dev->completion); + reinit_completion(&i2c_dev->completion); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, BCM2835_I2C_C_CLEAR); diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 132369f..960dec6 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -323,7 +323,7 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) davinci_i2c_write_reg(dev, DAVINCI_I2C_CNT_REG, dev->buf_len); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->cmd_err = 0; /* Take I2C out of reset and configure it as master */ diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 5888fee..e89e3e2 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -613,7 +613,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) mutex_lock(&dev->lock); pm_runtime_get_sync(dev->dev); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->msgs = msgs; dev->msgs_num = num; dev->cmd_err = 0; diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 1672eff..0043ede 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -541,7 +541,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, desc->dptr_high = upper_32_bits(dma_addr); } - INIT_COMPLETION(priv->cmp); + reinit_completion(&priv->cmp); /* Add the descriptor */ ismt_submit_desc(priv); diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index b7c8577..3aedd86 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -505,7 +505,7 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, return err; } } else { - INIT_COMPLETION(i2c->cmd_complete); + reinit_completion(&i2c->cmd_complete); ret = mxs_i2c_dma_setup_xfer(adap, msg, flags); if (ret) return ret; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 9967a6f..a6a891d 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -543,7 +543,7 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap, w |= OMAP_I2C_BUF_RXFIF_CLR | OMAP_I2C_BUF_TXFIF_CLR; omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, w); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->cmd_err = 0; w = OMAP_I2C_CON_EN | OMAP_I2C_CON_MST | OMAP_I2C_CON_STT; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c457cb4..e661ede 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -544,7 +544,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->msg_buf_remaining = msg->len; i2c_dev->msg_err = I2C_ERR_NONE; i2c_dev->msg_read = (msg->flags & I2C_M_RD); - INIT_COMPLETION(i2c_dev->msg_complete); + reinit_completion(&i2c_dev->msg_complete); packet_header = (0 << PACKET_HEADER0_HEADER_SIZE_SHIFT) | PACKET_HEADER0_PROTOCOL_I2C | diff --git a/drivers/i2c/busses/i2c-wmt.c b/drivers/i2c/busses/i2c-wmt.c index c65da3d..31395fa 100644 --- a/drivers/i2c/busses/i2c-wmt.c +++ b/drivers/i2c/busses/i2c-wmt.c @@ -158,7 +158,7 @@ static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, writew(val, i2c_dev->base + REG_CR); } - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); if (i2c_dev->mode == I2C_MODE_STANDARD) tcr_val = TCR_STANDARD_MODE; @@ -247,7 +247,7 @@ static int wmt_i2c_read(struct i2c_adapter *adap, struct i2c_msg *pmsg, writew(val, i2c_dev->base + REG_CR); } - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); if (i2c_dev->mode == I2C_MODE_STANDARD) tcr_val = TCR_STANDARD_MODE; diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index e6fbd3e..9a4e0e3 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -188,7 +188,7 @@ static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); ret = ad_sigma_delta_set_mode(sigma_delta, mode); if (ret < 0) @@ -259,7 +259,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_SINGLE); @@ -343,7 +343,7 @@ static int ad_sd_buffer_postdisable(struct iio_dev *indio_dev) { struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); wait_for_completion_timeout(&sigma_delta->completion, HZ); if (!sigma_delta->irq_dis) { diff --git a/drivers/iio/adc/nau7802.c b/drivers/iio/adc/nau7802.c index 54c5bab..e525aa6 100644 --- a/drivers/iio/adc/nau7802.c +++ b/drivers/iio/adc/nau7802.c @@ -190,7 +190,7 @@ static int nau7802_read_irq(struct iio_dev *indio_dev, struct nau7802_state *st = iio_priv(indio_dev); int ret; - INIT_COMPLETION(st->value_ok); + reinit_completion(&st->value_ok); enable_irq(st->client->irq); nau7802_sync(st); diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c index d53e0b7..4204841 100644 --- a/drivers/input/touchscreen/cyttsp_core.c +++ b/drivers/input/touchscreen/cyttsp_core.c @@ -242,7 +242,7 @@ static int cyttsp_soft_reset(struct cyttsp *ts) int retval; /* wait for interrupt to set ready completion */ - INIT_COMPLETION(ts->bl_ready); + reinit_completion(&ts->bl_ready); ts->state = CY_BL_STATE; enable_irq(ts->irq); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 50ea7ed..81b0fa6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -950,7 +950,7 @@ static int crypt_convert(struct crypt_config *cc, /* async */ case -EBUSY: wait_for_completion(&ctx->restart); - INIT_COMPLETION(ctx->restart); + reinit_completion(&ctx->restart); /* fall through*/ case -EINPROGRESS: this_cc->req = NULL; diff --git a/drivers/media/platform/blackfin/bfin_capture.c b/drivers/media/platform/blackfin/bfin_capture.c index 4c11059..2819165 100644 --- a/drivers/media/platform/blackfin/bfin_capture.c +++ b/drivers/media/platform/blackfin/bfin_capture.c @@ -422,7 +422,7 @@ static int bcap_start_streaming(struct vb2_queue *vq, unsigned int count) return ret; } - INIT_COMPLETION(bcap_dev->comp); + reinit_completion(&bcap_dev->comp); bcap_dev->stop = false; return 0; } diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 97c2c18..9cf6731 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -375,7 +375,7 @@ static int wl1273_fm_set_tx_freq(struct wl1273_device *radio, unsigned int freq) if (r) return r; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); /* wait for the FR IRQ */ r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(2000)); @@ -389,7 +389,7 @@ static int wl1273_fm_set_tx_freq(struct wl1273_device *radio, unsigned int freq) if (r) return r; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); /* wait for the POWER_ENB IRQ */ r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(1000)); @@ -444,7 +444,7 @@ static int wl1273_fm_set_rx_freq(struct wl1273_device *radio, unsigned int freq) goto err; } - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(2000)); if (!r) { @@ -805,7 +805,7 @@ static int wl1273_fm_set_seek(struct wl1273_device *radio, if (level < SCHAR_MIN || level > SCHAR_MAX) return -EINVAL; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); dev_dbg(radio->dev, "%s: BUSY\n", __func__); r = core->write(core, WL1273_INT_MASK_SET, radio->irq_flags); @@ -847,7 +847,7 @@ static int wl1273_fm_set_seek(struct wl1273_device *radio, if (r) goto out; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); dev_dbg(radio->dev, "%s: BUSY\n", __func__); r = core->write(core, WL1273_TUNER_MODE_SET, TUNER_MODE_AUTO_SEEK); diff --git a/drivers/media/radio/si470x/radio-si470x-common.c b/drivers/media/radio/si470x/radio-si470x-common.c index 5c57e5b..0bd2500 100644 --- a/drivers/media/radio/si470x/radio-si470x-common.c +++ b/drivers/media/radio/si470x/radio-si470x-common.c @@ -218,7 +218,7 @@ static int si470x_set_chan(struct si470x_device *radio, unsigned short chan) goto done; /* wait till tune operation has completed */ - INIT_COMPLETION(radio->completion); + reinit_completion(&radio->completion); retval = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(tune_timeout)); if (!retval) @@ -341,7 +341,7 @@ static int si470x_set_seek(struct si470x_device *radio, return retval; /* wait till tune operation has completed */ - INIT_COMPLETION(radio->completion); + reinit_completion(&radio->completion); retval = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(seek_timeout)); if (!retval) diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 19632b1..b53626b 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -207,7 +207,7 @@ static int iguanair_send(struct iguanair *ir, unsigned size) { int rc; - INIT_COMPLETION(ir->completion); + reinit_completion(&ir->completion); ir->urb_out->transfer_buffer_length = size; rc = usb_submit_urb(ir->urb_out, GFP_KERNEL); diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index bbf4aea..a0547db 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -253,7 +253,7 @@ void memstick_new_req(struct memstick_host *host) { if (host->card) { host->retries = cmd_retries; - INIT_COMPLETION(host->card->mrq_complete); + reinit_completion(&host->card->mrq_complete); host->request(host); } } diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 1b6e913..31727bf 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -290,7 +290,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev) dbg_verbose("doing dma transfer"); dev->dma_error = 0; - INIT_COMPLETION(dev->dma_done); + reinit_completion(&dev->dma_done); /* TODO: hidden assumption about nenth beeing always 1 */ sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ? diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index 914cc9b..8aa42e7 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -493,7 +493,7 @@ static int mic_remove_device(struct mic_device_desc __iomem *d, ioread8(&dc->config_change), ioread8(&d->type), mvdev); status = ioread8(&d->status); - INIT_COMPLETION(mvdev->reset_done); + reinit_completion(&mvdev->reset_done); unregister_virtio_device(&mvdev->vdev); mic_free_card_irq(mvdev->virtio_cookie, mvdev); if (status & VIRTIO_CONFIG_S_DRIVER_OK) diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index b079c65..7558d91 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -38,7 +38,7 @@ static void mic_reset(struct mic_device *mdev) #define MIC_RESET_TO (45) - INIT_COMPLETION(mdev->reset_wait); + reinit_completion(&mdev->reset_wait); mdev->ops->reset_fw_ready(mdev); mdev->ops->reset(mdev); diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 83907c7..96853a0 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -218,7 +218,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_debug("%s", __func__); - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) { pr_err("kim: couldn't write 4 bytes"); return -EIO; @@ -229,7 +229,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_err(" waiting for ver info- timed out "); return -ETIMEDOUT; } - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); /* the positions 12 & 13 in the response buffer provide with the * chip, major & minor numbers */ @@ -362,7 +362,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) /* reinit completion before sending for the * relevant wait */ - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); /* * Free space found in uart buffer, call st_int_write @@ -398,7 +398,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) release_firmware(kim_gdata->fw_entry); return -ETIMEDOUT; } - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); break; case ACTION_DELAY: /* sleep */ pr_info("sleep command in scr"); @@ -474,7 +474,7 @@ long st_kim_start(void *kim_data) gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); mdelay(100); /* re-initialize the completion */ - INIT_COMPLETION(kim_gdata->ldisc_installed); + reinit_completion(&kim_gdata->ldisc_installed); /* send notification to UIM */ kim_gdata->ldisc_install = 1; pr_info("ldisc_install = 1"); @@ -525,7 +525,7 @@ long st_kim_stop(void *kim_data) kim_gdata->kim_pdev->dev.platform_data; struct tty_struct *tty = kim_gdata->core_data->tty; - INIT_COMPLETION(kim_gdata->ldisc_installed); + reinit_completion(&kim_gdata->ldisc_installed); if (tty) { /* can be called before ldisc is installed */ /* Flush any pending characters in the driver and discipline. */ diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 4edea7f..9dfdb06 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -396,7 +396,7 @@ static void wait_op_done(struct mxc_nand_host *host, int useirq) if (useirq) { if (!host->devtype_data->check_int(host)) { - INIT_COMPLETION(host->op_completion); + reinit_completion(&host->op_completion); irq_control(host, 1); wait_for_completion(&host->op_completion); } diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index 9dcf02d..325930d 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -181,7 +181,7 @@ static void r852_do_dma(struct r852_device *dev, uint8_t *buf, int do_read) /* Set dma direction */ dev->dma_dir = do_read; dev->dma_stage = 1; - INIT_COMPLETION(dev->dma_done); + reinit_completion(&dev->dma_done); dbg_verbose("doing dma %s ", do_read ? "read" : "write"); diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 2362909..6547c84 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -159,7 +159,7 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); } - INIT_COMPLETION(c->irq_done); + reinit_completion(&c->irq_done); if (c->gpio_irq) { result = gpio_get_value(c->gpio_irq); if (result == -1) { @@ -349,7 +349,7 @@ static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); timeout = jiffies + msecs_to_jiffies(20); @@ -420,7 +420,7 @@ static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); timeout = jiffies + msecs_to_jiffies(20); @@ -499,7 +499,7 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); wait_for_completion(&c->dma_done); @@ -544,7 +544,7 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); wait_for_completion(&c->dma_done); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 09810dd..a01a6a7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3537,7 +3537,7 @@ int qlcnic_83xx_resume(struct qlcnic_adapter *adapter) void qlcnic_83xx_reinit_mbx_work(struct qlcnic_mailbox *mbx) { - INIT_COMPLETION(mbx->completion); + reinit_completion(&mbx->completion); set_bit(QLC_83XX_MBX_READY, &mbx->status); } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 6f10b49..2cbe1c2 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -561,7 +561,7 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) spin_lock_irqsave(&lp->lock, flags); lp->is_tx = 1; - INIT_COMPLETION(lp->tx_complete); + reinit_completion(&lp->tx_complete); spin_unlock_irqrestore(&lp->lock, flags); rc = at86rf230_write_fbuf(lp, skb->data, skb->len); diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 0632d34..c6e46d6 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -343,7 +343,7 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; - INIT_COMPLETION(devrec->tx_complete); + reinit_completion(&devrec->tx_complete); /* Set TXNTRIG bit of TXNCON to send packet */ ret = read_short_reg(devrec, REG_TXNCON, &val); diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 3118d75..edae50b 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -534,7 +534,7 @@ int ath10k_htc_wait_target(struct ath10k_htc *htc) u16 credit_count; u16 credit_size; - INIT_COMPLETION(htc->ctl_resp); + reinit_completion(&htc->ctl_resp); status = ath10k_hif_start(htc->ar); if (status) { @@ -669,7 +669,7 @@ int ath10k_htc_connect_service(struct ath10k_htc *htc, req_msg->flags = __cpu_to_le16(flags); req_msg->service_id = __cpu_to_le16(conn_req->service_id); - INIT_COMPLETION(htc->ctl_resp); + reinit_completion(&htc->ctl_resp); status = ath10k_htc_send(htc, ATH10K_HTC_EP_0, skb); if (status) { diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0b1cc51..97ac8c8 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -92,7 +92,7 @@ static int ath10k_install_key(struct ath10k_vif *arvif, lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->install_key_done); + reinit_completion(&ar->install_key_done); ret = ath10k_send_key(arvif, key, cmd, macaddr); if (ret) @@ -438,7 +438,7 @@ static int ath10k_vdev_start(struct ath10k_vif *arvif) lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->vdev_setup_done); + reinit_completion(&ar->vdev_setup_done); arg.vdev_id = arvif->vdev_id; arg.dtim_period = arvif->dtim_period; @@ -491,7 +491,7 @@ static int ath10k_vdev_stop(struct ath10k_vif *arvif) lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->vdev_setup_done); + reinit_completion(&ar->vdev_setup_done); ret = ath10k_wmi_vdev_stop(ar, arvif->vdev_id); if (ret) { @@ -1666,7 +1666,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) } spin_lock_bh(&ar->data_lock); - INIT_COMPLETION(ar->offchan_tx_completed); + reinit_completion(&ar->offchan_tx_completed); ar->offchan_tx_skb = skb; spin_unlock_bh(&ar->data_lock); @@ -2476,8 +2476,8 @@ static int ath10k_hw_scan(struct ieee80211_hw *hw, goto exit; } - INIT_COMPLETION(ar->scan.started); - INIT_COMPLETION(ar->scan.completed); + reinit_completion(&ar->scan.started); + reinit_completion(&ar->scan.completed); ar->scan.in_progress = true; ar->scan.aborting = false; ar->scan.is_roc = false; @@ -2832,9 +2832,9 @@ static int ath10k_remain_on_channel(struct ieee80211_hw *hw, goto exit; } - INIT_COMPLETION(ar->scan.started); - INIT_COMPLETION(ar->scan.completed); - INIT_COMPLETION(ar->scan.on_channel); + reinit_completion(&ar->scan.started); + reinit_completion(&ar->scan.completed); + reinit_completion(&ar->scan.on_channel); ar->scan.in_progress = true; ar->scan.aborting = false; ar->scan.is_roc = true; diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 307bc0d..ca115f3 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -773,7 +773,7 @@ void carl9170_usb_stop(struct ar9170 *ar) complete_all(&ar->cmd_wait); /* This is required to prevent an early completion on _start */ - INIT_COMPLETION(ar->cmd_wait); + reinit_completion(&ar->cmd_wait); /* * Note: diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 0a2844c..fd30cdd 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -250,7 +250,7 @@ int wil_reset(struct wil6210_priv *wil) /* init after reset */ wil->pending_connect_cid = -1; - INIT_COMPLETION(wil->wmi_ready); + reinit_completion(&wil->wmi_ready); /* TODO: release MAC reset */ wil6210_enable_irq(wil); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c index d7a9745..5b5b952 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c @@ -1148,7 +1148,7 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) pri_vif = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif; - INIT_COMPLETION(afx_hdl->act_frm_scan); + reinit_completion(&afx_hdl->act_frm_scan); set_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, &p2p->status); afx_hdl->is_active = true; afx_hdl->peer_chan = P2P_INVALID_CHANNEL; @@ -1501,7 +1501,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, brcmf_dbg(TRACE, "Enter\n"); - INIT_COMPLETION(p2p->send_af_done); + reinit_completion(&p2p->send_af_done); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 7ef0b4a..84d94f5 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1619,7 +1619,7 @@ static void prepare_read_regs_int(struct zd_usb *usb, atomic_set(&intr->read_regs_enabled, 1); intr->read_regs.req = req; intr->read_regs.req_count = count; - INIT_COMPLETION(intr->read_regs.completion); + reinit_completion(&intr->read_regs.completion); spin_unlock_irq(&intr->lock); } diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c index d471627..c864f82 100644 --- a/drivers/parport/parport_ip32.c +++ b/drivers/parport/parport_ip32.c @@ -1331,7 +1331,7 @@ static unsigned int parport_ip32_fwp_wait_interrupt(struct parport *p) break; /* Initialize mutex used to take interrupts into account */ - INIT_COMPLETION(priv->irq_complete); + reinit_completion(&priv->irq_complete); /* Enable serviceIntr */ parport_ip32_frob_econtrol(p, ECR_SERVINTR, 0); @@ -1446,7 +1446,7 @@ static size_t parport_ip32_fifo_write_block_dma(struct parport *p, priv->irq_mode = PARPORT_IP32_IRQ_HERE; parport_ip32_dma_start(DMA_TO_DEVICE, (void *)buf, len); - INIT_COMPLETION(priv->irq_complete); + reinit_completion(&priv->irq_complete); parport_ip32_frob_econtrol(p, ECR_DMAEN | ECR_SERVINTR, ECR_DMAEN); nfault_timeout = min((unsigned long)physport->cad->timeout, diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 8eea2ef..605a9be 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -289,7 +289,7 @@ static int gmux_switchto(enum vga_switcheroo_client_id id) static int gmux_set_discrete_state(struct apple_gmux_data *gmux_data, enum vga_switcheroo_state state) { - INIT_COMPLETION(gmux_data->powerchange_done); + reinit_completion(&gmux_data->powerchange_done); if (state == VGA_SWITCHEROO_ON) { gmux_write8(gmux_data, GMUX_PORT_DISCRETE_POWER, 1); diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 7549707..3cb4178 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -574,8 +574,8 @@ int ab8500_fg_inst_curr_start(struct ab8500_fg *di) } /* Return and WFI */ - INIT_COMPLETION(di->ab8500_fg_started); - INIT_COMPLETION(di->ab8500_fg_complete); + reinit_completion(&di->ab8500_fg_started); + reinit_completion(&di->ab8500_fg_complete); enable_irq(di->irq); /* Note: cc_lock is still locked */ diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c index d9686aa..6c8931d 100644 --- a/drivers/power/jz4740-battery.c +++ b/drivers/power/jz4740-battery.c @@ -73,7 +73,7 @@ static long jz_battery_read_voltage(struct jz_battery *battery) mutex_lock(&battery->lock); - INIT_COMPLETION(battery->read_completion); + reinit_completion(&battery->read_completion); enable_irq(battery->irq); battery->cell->enable(battery->pdev); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index a34e5cf..965a9da 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -209,7 +209,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) platform_get_drvdata(to_platform_device(dev)); int ret; - INIT_COMPLETION(time_state->comp_last_time); + reinit_completion(&time_state->comp_last_time); /* get a report with all values through requesting one value */ sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, HID_USAGE_SENSOR_TIME, hid_time_addresses[0], diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 4c332143..3ed666f 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -217,7 +217,7 @@ static int bcm2835_spi_start_transfer(struct spi_device *spi, cs |= spi->chip_select; } - INIT_COMPLETION(bs->done); + reinit_completion(&bs->done); bs->tx_buf = tfr->tx_buf; bs->rx_buf = tfr->rx_buf; bs->len = tfr->len; diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c index e2a5a42..6f03d7e 100644 --- a/drivers/spi/spi-clps711x.c +++ b/drivers/spi/spi-clps711x.c @@ -105,7 +105,7 @@ static int spi_clps711x_transfer_one_message(struct spi_master *master, gpio_set_value(cs, !!(msg->spi->mode & SPI_CS_HIGH)); - INIT_COMPLETION(hw->done); + reinit_completion(&hw->done); hw->count = 0; hw->len = xfer->len; diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index dd72445..50b2d88 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -554,7 +554,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t) clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); set_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK); - INIT_COMPLETION(dspi->done); + reinit_completion(&dspi->done); if (spicfg->io_type == SPI_IO_TYPE_INTR) set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT); diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c index 32200d4..80d8f40 100644 --- a/drivers/spi/spi-fsl-espi.c +++ b/drivers/spi/spi-fsl-espi.c @@ -232,7 +232,7 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t) mpc8xxx_spi->tx = t->tx_buf; mpc8xxx_spi->rx = t->rx_buf; - INIT_COMPLETION(mpc8xxx_spi->done); + reinit_completion(&mpc8xxx_spi->done); /* Set SPCOM[CS] and SPCOM[TRANLEN] field */ if ((t->len - 1) > SPCOM_TRANLEN_MAX) { diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 2129fcd..119f7af 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -339,7 +339,7 @@ static int fsl_spi_bufs(struct spi_device *spi, struct spi_transfer *t, mpc8xxx_spi->tx = t->tx_buf; mpc8xxx_spi->rx = t->rx_buf; - INIT_COMPLETION(mpc8xxx_spi->done); + reinit_completion(&mpc8xxx_spi->done); if (mpc8xxx_spi->flags & SPI_CPM_MODE) ret = fsl_spi_cpm_bufs(mpc8xxx_spi, t, is_dma_mapped); diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 58d5ee0..9602bbd 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -167,7 +167,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, } /* have the ISR trigger when the TX FIFO is empty */ - INIT_COMPLETION(mps->txisrdone); + reinit_completion(&mps->txisrdone); out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY); wait_for_completion(&mps->txisrdone); diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c index de33305..73afb56 100644 --- a/drivers/spi/spi-mxs.c +++ b/drivers/spi/spi-mxs.c @@ -202,7 +202,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, if (!dma_xfer) return -ENOMEM; - INIT_COMPLETION(spi->c); + reinit_completion(&spi->c); /* Chip select was already programmed into CTRL0 */ ctrl0 = readl(ssp->base + HW_SSP_CTRL0); diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 9e2020d..4c4b0a1 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -890,7 +890,7 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master, unsigned long flags; int use_dma; - INIT_COMPLETION(sdd->xfer_completion); + reinit_completion(&sdd->xfer_completion); /* Only BPW and Speed may change across transfers */ bpw = xfer->bits_per_word; diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 2a95435..c74298c 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -465,7 +465,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p, ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TXE); /* start by setting frame bit */ - INIT_COMPLETION(p->done); + reinit_completion(&p->done); ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TFSE); if (ret) { dev_err(&p->pdev->dev, "failed to start hardware\n"); diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c index 592b4af..ed5e501 100644 --- a/drivers/spi/spi-sirf.c +++ b/drivers/spi/spi-sirf.c @@ -305,8 +305,8 @@ static int spi_sirfsoc_transfer(struct spi_device *spi, struct spi_transfer *t) sspi->tx = t->tx_buf ? t->tx_buf : sspi->dummypage; sspi->rx = t->rx_buf ? t->rx_buf : sspi->dummypage; sspi->left_tx_word = sspi->left_rx_word = t->len / sspi->word_width; - INIT_COMPLETION(sspi->rx_done); - INIT_COMPLETION(sspi->tx_done); + reinit_completion(&sspi->rx_done); + reinit_completion(&sspi->tx_done); writel(SIRFSOC_SPI_INT_MASK_ALL, sspi->base + SIRFSOC_SPI_INT_STATUS); diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 9146bb3..aaecfb3 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -451,7 +451,7 @@ static void tegra_spi_dma_complete(void *args) static int tegra_spi_start_tx_dma(struct tegra_spi_data *tspi, int len) { - INIT_COMPLETION(tspi->tx_dma_complete); + reinit_completion(&tspi->tx_dma_complete); tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan, tspi->tx_dma_phys, len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -470,7 +470,7 @@ static int tegra_spi_start_tx_dma(struct tegra_spi_data *tspi, int len) static int tegra_spi_start_rx_dma(struct tegra_spi_data *tspi, int len) { - INIT_COMPLETION(tspi->rx_dma_complete); + reinit_completion(&tspi->rx_dma_complete); tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan, tspi->rx_dma_phys, len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -844,7 +844,7 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, list_for_each_entry(xfer, &msg->transfers, transfer_list) { unsigned long cmd1; - INIT_COMPLETION(tspi->xfer_completion); + reinit_completion(&tspi->xfer_completion); cmd1 = tegra_spi_setup_transfer_one(spi, xfer, is_first_msg); diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c index 79be8ce..4dc8e81 100644 --- a/drivers/spi/spi-tegra20-sflash.c +++ b/drivers/spi/spi-tegra20-sflash.c @@ -339,7 +339,7 @@ static int tegra_sflash_transfer_one_message(struct spi_master *master, msg->actual_length = 0; single_xfer = list_is_singular(&msg->transfers); list_for_each_entry(xfer, &msg->transfers, transfer_list) { - INIT_COMPLETION(tsd->xfer_completion); + reinit_completion(&tsd->xfer_completion); ret = tegra_sflash_start_transfer_one(spi, xfer, is_first_msg, single_xfer); if (ret < 0) { diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index af0a678..e66715b 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -462,7 +462,7 @@ static void tegra_slink_dma_complete(void *args) static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len) { - INIT_COMPLETION(tspi->tx_dma_complete); + reinit_completion(&tspi->tx_dma_complete); tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan, tspi->tx_dma_phys, len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -481,7 +481,7 @@ static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len) static int tegra_slink_start_rx_dma(struct tegra_slink_data *tspi, int len) { - INIT_COMPLETION(tspi->rx_dma_complete); + reinit_completion(&tspi->rx_dma_complete); tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan, tspi->rx_dma_phys, len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -836,7 +836,7 @@ static int tegra_slink_transfer_one(struct spi_master *master, struct tegra_slink_data *tspi = spi_master_get_devdata(master); int ret; - INIT_COMPLETION(tspi->xfer_completion); + reinit_completion(&tspi->xfer_completion); ret = tegra_slink_start_transfer_one(spi, xfer); if (ret < 0) { dev_err(tspi->dev, diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index ec3a83f..6d4ce46 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -258,7 +258,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) xspi->tx_ptr = t->tx_buf; xspi->rx_ptr = t->rx_buf; xspi->remaining_bytes = t->len; - INIT_COMPLETION(xspi->done); + reinit_completion(&xspi->done); /* Enable the transmit empty interrupt, which we use to determine diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 927998a..8d85ddc 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -571,7 +571,7 @@ static int spi_transfer_one_message(struct spi_master *master, list_for_each_entry(xfer, &msg->transfers, transfer_list) { trace_spi_transfer_start(msg, xfer); - INIT_COMPLETION(master->xfer_completion); + reinit_completion(&master->xfer_completion); ret = master->transfer_one(master, msg->spi, xfer); if (ret < 0) { diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index aeae76b..e2dd783 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -783,7 +783,7 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, if (!ret) return -EBUSY; - INIT_COMPLETION(lradc->completion); + reinit_completion(&lradc->completion); /* * No buffered operation in progress, map the channel and trigger it. diff --git a/drivers/staging/media/solo6x10/solo6x10-p2m.c b/drivers/staging/media/solo6x10/solo6x10-p2m.c index 3335941..7f2f247 100644 --- a/drivers/staging/media/solo6x10/solo6x10-p2m.c +++ b/drivers/staging/media/solo6x10/solo6x10-p2m.c @@ -87,7 +87,7 @@ int solo_p2m_dma_desc(struct solo_dev *solo_dev, if (mutex_lock_interruptible(&p2m_dev->mutex)) return -EINTR; - INIT_COMPLETION(p2m_dev->completion); + reinit_completion(&p2m_dev->completion); p2m_dev->error = 0; if (desc_cnt > 1 && solo_dev->type != SOLO_DEV_6110 && desc_mode) { diff --git a/drivers/staging/tidspbridge/core/sync.c b/drivers/staging/tidspbridge/core/sync.c index 7bb550a..743ff09 100644 --- a/drivers/staging/tidspbridge/core/sync.c +++ b/drivers/staging/tidspbridge/core/sync.c @@ -72,7 +72,7 @@ int sync_wait_on_multiple_events(struct sync_object **events, spin_lock_bh(&sync_lock); for (i = 0; i < count; i++) { if (completion_done(&events[i]->comp)) { - INIT_COMPLETION(events[i]->comp); + reinit_completion(&events[i]->comp); *index = i; spin_unlock_bh(&sync_lock); status = 0; @@ -92,7 +92,7 @@ int sync_wait_on_multiple_events(struct sync_object **events, spin_lock_bh(&sync_lock); for (i = 0; i < count; i++) { if (completion_done(&events[i]->comp)) { - INIT_COMPLETION(events[i]->comp); + reinit_completion(&events[i]->comp); *index = i; status = 0; } diff --git a/drivers/staging/tidspbridge/include/dspbridge/sync.h b/drivers/staging/tidspbridge/include/dspbridge/sync.h index 58a0d5c..fc19b97 100644 --- a/drivers/staging/tidspbridge/include/dspbridge/sync.h +++ b/drivers/staging/tidspbridge/include/dspbridge/sync.h @@ -59,7 +59,7 @@ static inline void sync_init_event(struct sync_object *event) static inline void sync_reset_event(struct sync_object *event) { - INIT_COMPLETION(event->comp); + reinit_completion(&event->comp); event->multi_comp = NULL; } diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index 6d04eb4..1aa4a3f 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -332,7 +332,7 @@ static void bridge_recover(struct work_struct *work) struct dev_object *dev; struct cfg_devnode *dev_node; if (atomic_read(&bridge_cref)) { - INIT_COMPLETION(bridge_comp); + reinit_completion(&bridge_comp); while (!wait_for_completion_timeout(&bridge_comp, msecs_to_jiffies(REC_TIMEOUT))) pr_info("%s:%d handle(s) still opened\n", @@ -348,7 +348,7 @@ static void bridge_recover(struct work_struct *work) void bridge_recover_schedule(void) { - INIT_COMPLETION(bridge_open_comp); + reinit_completion(&bridge_open_comp); recover = true; queue_work(bridge_rec_queue, &bridge_recovery_work); } @@ -389,7 +389,7 @@ static int omap3_bridge_startup(struct platform_device *pdev) #ifdef CONFIG_TIDSPBRIDGE_RECOVERY bridge_rec_queue = create_workqueue("bridge_rec_queue"); INIT_WORK(&bridge_recovery_work, bridge_recover); - INIT_COMPLETION(bridge_comp); + reinit_completion(&bridge_comp); #endif #ifdef CONFIG_PM diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c index 0e888621..7332e2c 100644 --- a/drivers/tty/metag_da.c +++ b/drivers/tty/metag_da.c @@ -495,7 +495,7 @@ static int dashtty_write(struct tty_struct *tty, const unsigned char *buf, count = dport->xmit_cnt; /* xmit buffer no longer empty? */ if (count) - INIT_COMPLETION(dport->xmit_empty); + reinit_completion(&dport->xmit_empty); mutex_unlock(&dport->xmit_lock); if (total) { diff --git a/drivers/usb/c67x00/c67x00-sched.c b/drivers/usb/c67x00/c67x00-sched.c index aa49162..892cc96 100644 --- a/drivers/usb/c67x00/c67x00-sched.c +++ b/drivers/usb/c67x00/c67x00-sched.c @@ -344,7 +344,7 @@ void c67x00_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *ep) /* it could happen that we reinitialize this completion, while * somebody was waiting for that completion. The timeout and * while loop handle such cases, but this might be improved */ - INIT_COMPLETION(c67x00->endpoint_disable); + reinit_completion(&c67x00->endpoint_disable); c67x00_sched_kick(c67x00); wait_for_completion_timeout(&c67x00->endpoint_disable, 1 * HZ); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 44cf775..774e8b8 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -373,7 +373,7 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) if (req->buf == NULL) req->buf = (void *)0xDEADBABE; - INIT_COMPLETION(ffs->ep0req_completion); + reinit_completion(&ffs->ep0req_completion); ret = usb_ep_queue(ffs->gadget->ep0, req, GFP_ATOMIC); if (unlikely(ret < 0)) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 84657e0..439c951 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -455,7 +455,7 @@ static int parport_prologue(struct parport *pp) return -1; } mos_parport->msg_pending = true; /* synch usb call pending */ - INIT_COMPLETION(mos_parport->syncmsg_compl); + reinit_completion(&mos_parport->syncmsg_compl); spin_unlock(&release_lock); mutex_lock(&mos_parport->serial->disc_mutex); diff --git a/drivers/video/exynos/exynos_mipi_dsi_common.c b/drivers/video/exynos/exynos_mipi_dsi_common.c index 7eed957..85edabf 100644 --- a/drivers/video/exynos/exynos_mipi_dsi_common.c +++ b/drivers/video/exynos/exynos_mipi_dsi_common.c @@ -220,7 +220,7 @@ int exynos_mipi_dsi_wr_data(struct mipi_dsim_device *dsim, unsigned int data_id, case MIPI_DSI_DCS_LONG_WRITE: { unsigned int size, payload = 0; - INIT_COMPLETION(dsim_wr_comp); + reinit_completion(&dsim_wr_comp); size = data_size * 4; @@ -356,7 +356,7 @@ int exynos_mipi_dsi_rd_data(struct mipi_dsim_device *dsim, unsigned int data_id, msleep(20); mutex_lock(&dsim->lock); - INIT_COMPLETION(dsim_rd_comp); + reinit_completion(&dsim_rd_comp); exynos_mipi_dsi_rd_tx_header(dsim, MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE, req_size); diff --git a/drivers/video/omap2/displays-new/encoder-tpd12s015.c b/drivers/video/omap2/displays-new/encoder-tpd12s015.c index 798ef20..d5c936c 100644 --- a/drivers/video/omap2/displays-new/encoder-tpd12s015.c +++ b/drivers/video/omap2/displays-new/encoder-tpd12s015.c @@ -69,7 +69,7 @@ static int tpd_connect(struct omap_dss_device *dssdev, dst->src = dssdev; dssdev->dst = dst; - INIT_COMPLETION(ddata->hpd_completion); + reinit_completion(&ddata->hpd_completion); gpio_set_value_cansleep(ddata->ct_cp_hpd_gpio, 1); /* DC-DC converter needs at max 300us to get to 90% of 5V */ diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 000eae2..2f6735d 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, wait_for_completion(&ecr->completion); rc = ecr->rc; - INIT_COMPLETION(ecr->completion); + reinit_completion(&ecr->completion); } out: ablkcipher_request_free(req); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c8e729d..74a7e12 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,7 +244,7 @@ static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { - INIT_COMPLETION(tbl->complete); + reinit_completion(&tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3a44a64..3407b2c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) { wait_for_completion(&mw->mw_complete); /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return mw->mw_status; } @@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, else ret = mw->mw_status; /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return ret; } diff --git a/sound/firewire/dice.c b/sound/firewire/dice.c index 6feee66..57bcd31 100644 --- a/sound/firewire/dice.c +++ b/sound/firewire/dice.c @@ -543,7 +543,7 @@ static int dice_change_rate(struct dice *dice, unsigned int clock_rate) __be32 value; int err; - INIT_COMPLETION(dice->clock_accepted); + reinit_completion(&dice->clock_accepted); value = cpu_to_be32(clock_rate | CLOCK_SOURCE_ARX1); err = snd_fw_transaction(dice->unit, TCODE_WRITE_QUADLET_REQUEST, diff --git a/sound/soc/samsung/ac97.c b/sound/soc/samsung/ac97.c index 2acf987..350ba23 100644 --- a/sound/soc/samsung/ac97.c +++ b/sound/soc/samsung/ac97.c @@ -74,7 +74,7 @@ static void s3c_ac97_activate(struct snd_ac97 *ac97) if (stat == S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE) return; /* Return if already active */ - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_glbctrl = readl(s3c_ac97.regs + S3C_AC97_GLBCTRL); ac_glbctrl = S3C_AC97_GLBCTRL_ACLINKON; @@ -103,7 +103,7 @@ static unsigned short s3c_ac97_read(struct snd_ac97 *ac97, s3c_ac97_activate(ac97); - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_codec_cmd = readl(s3c_ac97.regs + S3C_AC97_CODEC_CMD); ac_codec_cmd = S3C_AC97_CODEC_CMD_READ | AC_CMD_ADDR(reg); @@ -140,7 +140,7 @@ static void s3c_ac97_write(struct snd_ac97 *ac97, unsigned short reg, s3c_ac97_activate(ac97); - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_codec_cmd = readl(s3c_ac97.regs + S3C_AC97_CODEC_CMD); ac_codec_cmd = AC_CMD_ADDR(reg) | AC_CMD_DATA(val); -- cgit v0.10.2 From 62026aedaacedbe1ffe94a3599ad4acd8ecdf587 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:03 -0800 Subject: sched: remove INIT_COMPLETION All users are converted over to reinit_completion(). Remove the old macro now. Signed-off-by: Wolfram Sang Cc: Linus Walleij Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/completion.h b/include/linux/completion.h index 124e4b4..5d5aaae 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -106,14 +106,4 @@ extern bool completion_done(struct completion *x); extern void complete(struct completion *); extern void complete_all(struct completion *); -/** - * INIT_COMPLETION - reinitialize a completion structure - * @x: completion structure to be reinitialized - * - * This macro should be used to reinitialize a completion structure so it can - * be reused. This is especially important after complete_all() is used. - */ -#define INIT_COMPLETION(x) ((x).done = 0) - - #endif -- cgit v0.10.2 From c853b167e6ec1f25023cfc58ba2f43f9f6f5b49b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 14 Nov 2013 14:32:04 -0800 Subject: drivers/w1/masters/w1-gpio.c: use dev_get_platdata() Use the wrapper function for retrieving the platform data instead of accessing dev->platform_data directly. This is a cosmetic change to make the code simpler and enhance the readability. Signed-off-by: Jingoo Han Acked-by: Evgeniy Polyakov Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index 264ad1c..e36b18b 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -56,7 +56,7 @@ MODULE_DEVICE_TABLE(of, w1_gpio_dt_ids); static int w1_gpio_probe_dt(struct platform_device *pdev) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device_node *np = pdev->dev.of_node; int gpio; @@ -92,7 +92,7 @@ static int w1_gpio_probe(struct platform_device *pdev) } } - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata) { dev_err(&pdev->dev, "No configuration data\n"); @@ -154,7 +154,7 @@ static int w1_gpio_probe(struct platform_device *pdev) static int w1_gpio_remove(struct platform_device *pdev) { struct w1_bus_master *master = platform_get_drvdata(pdev); - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(0); @@ -171,7 +171,7 @@ static int w1_gpio_remove(struct platform_device *pdev) static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(0); @@ -181,7 +181,7 @@ static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state) static int w1_gpio_resume(struct platform_device *pdev) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(1); -- cgit v0.10.2 From fc21c0cff2f425891b28ff6fb6b03b325c977428 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:06 -0800 Subject: revert "softirq: Add support for triggering softirq work on softirqs" This commit was incomplete in that code to remove items from the per-cpu lists was missing and never acquired a user in the 5 years it has been in the tree. We're going to implement what it seems to try to archive in a simpler way, and this code is in the way of doing so. Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c9e831d..db43b58 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -392,15 +390,6 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -/* This is the worklist that queues up per-cpu softirq work. - * - * send_remote_sendirq() adds work to these lists, and - * the softirq handler itself dequeues from them. The queues - * are protected by disabling local cpu interrupts and they must - * only be accessed by the local cpu that they are for. - */ -DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); - DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) @@ -408,17 +397,6 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) return this_cpu_read(ksoftirqd); } -/* Try to send a softirq to a remote cpu. If this cannot be done, the - * work will be queued to the local cpu. - */ -extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); - -/* Like send_remote_softirq(), but the caller must disable local cpu interrupts - * and compute the current cpu, passed in as 'this_cpu'. - */ -extern void __send_remote_softirq(struct call_single_data *cp, int cpu, - int this_cpu, int softirq); - /* Tasklets --- multithreaded analogue of BHs. Main feature differing them of generic softirqs: tasklet diff --git a/kernel/softirq.c b/kernel/softirq.c index b249883..11025cc 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -6,8 +6,6 @@ * Distribute under GPLv2. * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) - * - * Remote softirq infrastructure is by Jens Axboe. */ #include @@ -627,146 +625,17 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, } EXPORT_SYMBOL_GPL(tasklet_hrtimer_init); -/* - * Remote softirq bits - */ - -DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); -EXPORT_PER_CPU_SYMBOL(softirq_work_list); - -static void __local_trigger(struct call_single_data *cp, int softirq) -{ - struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); - - list_add_tail(&cp->list, head); - - /* Trigger the softirq only if the list was previously empty. */ - if (head->next == &cp->list) - raise_softirq_irqoff(softirq); -} - -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS -static void remote_softirq_receive(void *data) -{ - struct call_single_data *cp = data; - unsigned long flags; - int softirq; - - softirq = *(int *)cp->info; - local_irq_save(flags); - __local_trigger(cp, softirq); - local_irq_restore(flags); -} - -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - if (cpu_online(cpu)) { - cp->func = remote_softirq_receive; - cp->info = &softirq; - cp->flags = 0; - - __smp_call_function_single(cpu, cp, 0); - return 0; - } - return 1; -} -#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - return 1; -} -#endif - -/** - * __send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @this_cpu: the currently executing cpu - * @softirq: the softirq for the work - * - * Attempt to schedule softirq work on a remote cpu. If this cannot be - * done, the work is instead queued up on the local cpu. - * - * Interrupts must be disabled. - */ -void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) -{ - if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) - __local_trigger(cp, softirq); -} -EXPORT_SYMBOL(__send_remote_softirq); - -/** - * send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @softirq: the softirq for the work - * - * Like __send_remote_softirq except that disabling interrupts and - * computing the current cpu is done for the caller. - */ -void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - unsigned long flags; - int this_cpu; - - local_irq_save(flags); - this_cpu = smp_processor_id(); - __send_remote_softirq(cp, cpu, this_cpu, softirq); - local_irq_restore(flags); -} -EXPORT_SYMBOL(send_remote_softirq); - -static int remote_softirq_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - int i; - - local_irq_disable(); - for (i = 0; i < NR_SOFTIRQS; i++) { - struct list_head *head = &per_cpu(softirq_work_list[i], cpu); - struct list_head *local_head; - - if (list_empty(head)) - continue; - - local_head = &__get_cpu_var(softirq_work_list[i]); - list_splice_init(head, local_head); - raise_softirq_irqoff(i); - } - local_irq_enable(); - } - - return NOTIFY_OK; -} - -static struct notifier_block remote_softirq_cpu_notifier = { - .notifier_call = remote_softirq_cpu_notify, -}; - void __init softirq_init(void) { int cpu; for_each_possible_cpu(cpu) { - int i; - per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; - for (i = 0; i < NR_SOFTIRQS; i++) - INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); } - register_hotcpu_notifier(&remote_softirq_cpu_notifier); - open_softirq(TASKLET_SOFTIRQ, tasklet_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -- cgit v0.10.2 From 0a06ff068f1255bcd7965ab07bc0f4adc3eb639a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:07 -0800 Subject: kernel: remove CONFIG_USE_GENERIC_SMP_HELPERS We've switched over every architecture that supports SMP to it, so remove the new useless config variable. Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/Kconfig b/arch/Kconfig index ded747c..f1cf895 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -207,9 +207,6 @@ config HAVE_DMA_ATTRS config HAVE_DMA_CONTIGUOUS bool -config USE_GENERIC_SMP_HELPERS - bool - config GENERIC_SMP_IDLE_THREAD bool diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 35a300d..8d2a483 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -522,7 +522,6 @@ config ARCH_MAY_HAVE_PC_FDC config SMP bool "Symmetric multi-processing support" depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5ede546..2ee0c9b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -125,7 +125,6 @@ config ARC_PLAT_NEEDS_CPU_TO_DMA config SMP bool "Symmetric Multi-Processing (Incomplete)" default n - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 603d661..00c1ff4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1432,7 +1432,6 @@ config SMP depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP depends on MMU || ARM_MPU - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bb0bf1b..9714fe0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,7 +143,6 @@ config CPU_BIG_ENDIAN config SMP bool "Symmetric Multi-Processing" - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you say N here, the kernel will run on single and diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index e887b57..9ceccef 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -34,7 +34,6 @@ config BLACKFIN select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_NMI_WATCHDOG if NMI_WATCHDOG select GENERIC_SMP_IDLE_THREAD select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 99041b0..09df260 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -4,7 +4,6 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y select HAVE_OPROFILE - select USE_GENERIC_SMP_HELPERS if SMP # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 7740ab1..dfe85e9 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -343,7 +343,6 @@ config FORCE_MAX_ZONEORDER config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 75661fb..09ef94a 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -275,7 +275,6 @@ source "kernel/Kconfig.preempt" config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 36368eb..e56abd2 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -111,7 +111,6 @@ config METAG_META21 config SMP bool "Symmetric multi-processing support" depends on METAG_META21 && METAG_META21_MMU - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one thread running Linux. If you have a system with only one thread running Linux, diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 17cc7ff..867d7db 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2125,7 +2125,6 @@ source "mm/Kconfig" config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 6aaa160..8bde923 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -181,7 +181,6 @@ endmenu config SMP bool "Symmetric multi-processing support" default y - select USE_GENERIC_SMP_HELPERS depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050 ---help--- This enables support for systems with more than one CPU. If you have diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7dcde53..c03567a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -226,7 +226,6 @@ endchoice config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2f898d6..4740b0a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -106,7 +106,6 @@ config PPC select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK select GENERIC_ATOMIC64 if PPC32 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f75d7e5..314fced 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -141,7 +141,6 @@ config S390 select OLD_SIGACTION select OLD_SIGSUSPEND3 select SYSCTL_EXCEPTION_TRACE - select USE_GENERIC_SMP_HELPERS if SMP select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 224f4bc..e78561b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -711,7 +711,6 @@ config CC_STACKPROTECTOR config SMP bool "Symmetric multi-processing support" depends on SYS_SUPPORTS_SMP - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 78c4fdb..8591b20 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -28,7 +28,6 @@ config SPARC select HAVE_ARCH_JUMP_LABEL select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION - select USE_GENERIC_SMP_HELPERS if SMP select GENERIC_PCI_IOMAP select HAVE_NMI_WATCHDOG if SPARC64 select HAVE_BPF_JIT diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index d45a2c4..b3692ce 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -8,7 +8,6 @@ config TILE select HAVE_KVM if !TILEGX select GENERIC_FIND_FIRST_BIT select SYSCTL_EXCEPTION_TRACE - select USE_GENERIC_SMP_HELPERS select CC_OPTIMIZE_FOR_SIZE select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_PROBE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index af5513e..83f521a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -90,7 +90,6 @@ config X86 select GENERIC_IRQ_SHOW select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select CLKEVT_I8253 diff --git a/block/blk-mq.c b/block/blk-mq.c index 88d4e86..c661896 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -319,7 +319,7 @@ void __blk_mq_end_io(struct request *rq, int error) blk_mq_complete_request(rq, error); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if defined(CONFIG_SMP) /* * Called with interrupts disabled. @@ -361,7 +361,7 @@ static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, return true; } -#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#else /* CONFIG_SMP */ static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, struct request *rq, const int error) { diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ce4b8bf..57790c1 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -36,7 +36,7 @@ static void blk_done_softirq(struct softirq_action *h) } } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void trigger_softirq(void *data) { struct request *rq = data; @@ -71,7 +71,7 @@ static int raise_blk_irq(int cpu, struct request *rq) return 1; } -#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#else /* CONFIG_SMP */ static int raise_blk_irq(int cpu, struct request *rq) { return 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4f8c4d9..9777952 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -288,7 +288,7 @@ static ssize_t queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) { ssize_t ret = -EINVAL; -#if defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP unsigned long val; ret = queue_var_store(&val, page, count); diff --git a/include/linux/smp.h b/include/linux/smp.h index 731f523..7885151 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -106,14 +106,10 @@ void kick_all_cpus_sync(void); /* * Generic and arch helpers */ -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -#else -static inline void call_function_init(void) { } -#endif /* * Mark the boot cpu "online" so that it can call console drivers in diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 94fabd5..2a202a8 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -55,4 +55,4 @@ config HZ default 1000 if HZ_1000 config SCHED_HRTICK - def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS) + def_bool HIGH_RES_TIMERS diff --git a/kernel/smp.c b/kernel/smp.c index 4611610..1c194e2 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -15,7 +15,6 @@ #include "smpboot.h" -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS enum { CSD_FLAG_LOCK = 0x01, CSD_FLAG_WAIT = 0x02, @@ -464,7 +463,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait) return 0; } EXPORT_SYMBOL(smp_call_function); -#endif /* USE_GENERIC_SMP_HELPERS */ /* Setup configured maximum number of CPUs to activate */ unsigned int setup_max_cpus = NR_CPUS; -- cgit v0.10.2 From 40c01e8bd5575e32633192513e09eac7155d6926 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:08 -0800 Subject: kernel: provide a __smp_call_function_single stub for !CONFIG_SMP Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/up.c b/kernel/up.c index 630d72b..509403e 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,6 +22,17 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); +void __smp_call_function_single(int cpu, struct call_single_data *csd, + int wait) +{ + unsigned long flags; + + local_irq_save(flags); + csd->func(csd->info); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__smp_call_function_single); + int on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; -- cgit v0.10.2 From 7cf64f861b7a43b395f0855994003254b06a7e5a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:32:09 -0800 Subject: kernel-provide-a-__smp_call_function_single-stub-for-config_smp-fix x86_64 allnoconfig: kernel/up.c:25: error: redefinition of '__smp_call_function_single' include/linux/smp.h:154: note: previous definition of '__smp_call_function_single' was here Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/smp.h b/include/linux/smp.h index 7885151..5da22ee 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -49,6 +49,9 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); +void __smp_call_function_single(int cpuid, struct call_single_data *data, + int wait); + #ifdef CONFIG_SMP #include @@ -95,9 +98,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait); -void __smp_call_function_single(int cpuid, struct call_single_data *data, - int wait); - int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait); @@ -151,12 +151,6 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } -static inline void __smp_call_function_single(int cpuid, - struct call_single_data *data, int wait) -{ - on_each_cpu(data->func, data->info, wait); -} - #endif /* !SMP */ /* -- cgit v0.10.2 From ca5ecd64c2cdbcd316d789467147e732746f39fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:10 -0800 Subject: kernel: fix generic_exec_single indentation Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/smp.c b/kernel/smp.c index 1c194e2..bd9f940 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -139,8 +139,7 @@ static void csd_unlock(struct call_single_data *csd) * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static -void generic_exec_single(int cpu, struct call_single_data *csd, int wait) +static void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; -- cgit v0.10.2 From b89241e8cdb8321c20546d47645a9b65b58113b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:11 -0800 Subject: llists: move llist_reverse_order from raid5 to llist.c Make this useful helper available for other users. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8b9068..7f0e17a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -293,20 +293,6 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) do_release_stripe(conf, sh); } -static struct llist_node *llist_reverse_order(struct llist_node *head) -{ - struct llist_node *new_head = NULL; - - while (head) { - struct llist_node *tmp = head; - head = head->next; - tmp->next = new_head; - new_head = tmp; - } - - return new_head; -} - /* should hold conf->device_lock already */ static int release_stripe_list(struct r5conf *conf) { diff --git a/include/linux/llist.h b/include/linux/llist.h index 8828a78..fbf10a0 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -195,4 +195,6 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) extern struct llist_node *llist_del_first(struct llist_head *head); +struct llist_node *llist_reverse_order(struct llist_node *head); + #endif /* LLIST_H */ diff --git a/lib/llist.c b/lib/llist.c index 4a70d12..ef48b87 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head) return entry; } EXPORT_SYMBOL_GPL(llist_del_first); + +/** + * llist_reverse_order - reverse order of a llist chain + * @head: first item of the list to be reversed + * + * Reverse the oder of a chain of llist entries and return the + * new first entry. + */ +struct llist_node *llist_reverse_order(struct llist_node *head) +{ + struct llist_node *new_head = NULL; + + while (head) { + struct llist_node *tmp = head; + head = head->next; + tmp->next = new_head; + new_head = tmp; + } + + return new_head; +} +EXPORT_SYMBOL_GPL(llist_reverse_order); -- cgit v0.10.2 From 0791a6057cb60d12ec5e3182b99e6ffa8044ee3a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:32:13 -0800 Subject: llists-move-llist_reverse_order-from-raid5-to-llistc-fix fix comment typo, per Jan Cc: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/llist.c b/lib/llist.c index ef48b87..f76196d 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(llist_del_first); * llist_reverse_order - reverse order of a llist chain * @head: first item of the list to be reversed * - * Reverse the oder of a chain of llist entries and return the + * Reverse the order of a chain of llist entries and return the * new first entry. */ struct llist_node *llist_reverse_order(struct llist_node *head) -- cgit v0.10.2 From 07968fe4acec6d49e39520ef407ea7f6874b7c2e Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 14 Nov 2013 14:32:15 -0800 Subject: sound/core/memalloc.c: use gen_pool_dma_alloc() to allocate iram buffer Since gen_pool_dma_alloc() is introduced, we implement it to simplify code. Signed-off-by: Nicolin Chen Acked-by: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 9d93f02..5e1c7bc 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -184,11 +184,7 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size) /* Assign the pool into private_data field */ dmab->private_data = pool; - dmab->area = (void *)gen_pool_alloc(pool, size); - if (!dmab->area) - return; - - dmab->addr = gen_pool_virt_to_phys(pool, (unsigned long)dmab->area); + dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr); } /** -- cgit v0.10.2 From a019e48cfbfb358786326db3dbc1c565b8f14a56 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 14 Nov 2013 14:32:16 -0800 Subject: kfifo: kfifo_copy_{to,from}_user: fix copied bytes calculation 'copied' and 'len' are in bytes, while 'ret' is in elements, so we need to multiply 'ret' with the size of one element to get the correct result. Signed-off-by: Lars-Peter Clausen Cc: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/kfifo.c b/lib/kfifo.c index 7b7f830..d79b9d2 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, * incrementing the fifo->in index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } @@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, * incrementing the fifo->out index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } -- cgit v0.10.2 From 498d319bb512992ef0784c278fa03679f2f5649d Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Thu, 14 Nov 2013 14:32:17 -0800 Subject: kfifo API type safety This patch enhances the type safety for the kfifo API. It is now safe to put const data into a non const FIFO and the API will now generate a compiler warning when reading from the fifo where the destination address is pointing to a const variable. As a side effect the kfifo_put() does now expect the value of an element instead a pointer to the element. This was suggested Russell King. It make the handling of the kfifo_put easier since there is no need to create a helper variable for getting the address of a pointer or to pass integers of different sizes. IMHO the API break is okay, since there are currently only six users of kfifo_put(). The code is also cleaner by kicking out the "if (0)" expressions. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Stefani Seibold Cc: Russell King Cc: Hauke Mehrtens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c index e788882..f9c7fa3 100644 --- a/drivers/gpu/drm/drm_flip_work.c +++ b/drivers/gpu/drm/drm_flip_work.c @@ -34,7 +34,7 @@ */ void drm_flip_work_queue(struct drm_flip_work *work, void *val) { - if (kfifo_put(&work->fifo, (const void **)&val)) { + if (kfifo_put(&work->fifo, val)) { atomic_inc(&work->pending); } else { DRM_ERROR("%s fifo full!\n", work->name); diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index dac15b9..c10eab6 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -56,7 +56,7 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp) ev.id = ev_code; ev.timestamp = timestamp; - copied = kfifo_put(&ev_int->det_events, &ev); + copied = kfifo_put(&ev_int->det_events, ev); if (copied != 0) wake_up_locked_poll(&ev_int->wait, POLLIN); } diff --git a/drivers/net/wireless/rt2x00/rt2800mmio.c b/drivers/net/wireless/rt2x00/rt2800mmio.c index ae15228..a8cc736 100644 --- a/drivers/net/wireless/rt2x00/rt2800mmio.c +++ b/drivers/net/wireless/rt2x00/rt2800mmio.c @@ -446,7 +446,7 @@ static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev) if (!rt2x00_get_field32(status, TX_STA_FIFO_VALID)) break; - if (!kfifo_put(&rt2x00dev->txstatus_fifo, &status)) { + if (!kfifo_put(&rt2x00dev->txstatus_fifo, status)) { rt2x00_warn(rt2x00dev, "TX status FIFO overrun, drop tx status report\n"); break; } diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 997df03..a81ceb6 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -164,7 +164,7 @@ static bool rt2800usb_tx_sta_fifo_read_completed(struct rt2x00_dev *rt2x00dev, valid = rt2x00_get_field32(tx_status, TX_STA_FIFO_VALID); if (valid) { - if (!kfifo_put(&rt2x00dev->txstatus_fifo, &tx_status)) + if (!kfifo_put(&rt2x00dev->txstatus_fifo, tx_status)) rt2x00_warn(rt2x00dev, "TX status FIFO overrun\n"); queue_work(rt2x00dev->workqueue, &rt2x00dev->txdone_work); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 85ca36f..6b3a958 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -574,7 +574,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, }; spin_lock_irqsave(&aer_recover_ring_lock, flags); - if (kfifo_put(&aer_recover_ring, &entry)) + if (kfifo_put(&aer_recover_ring, entry)) schedule_work(&aer_recover_work); else pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 10308c6..552d51e 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -1,7 +1,7 @@ /* * A generic kernel FIFO implementation * - * Copyright (C) 2009/2010 Stefani Seibold + * Copyright (C) 2013 Stefani Seibold * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,9 +67,10 @@ struct __kfifo { union { \ struct __kfifo kfifo; \ datatype *type; \ + const datatype *const_type; \ char (*rectype)[recsize]; \ ptrtype *ptr; \ - const ptrtype *ptr_const; \ + ptrtype const *ptr_const; \ } #define __STRUCT_KFIFO(type, size, recsize, ptrtype) \ @@ -386,16 +387,12 @@ __kfifo_int_must_check_helper( \ #define kfifo_put(fifo, val) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(*__tmp->const_type) __val = (val); \ unsigned int __ret; \ - const size_t __recsize = sizeof(*__tmp->rectype); \ + size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \ - __dummy = (typeof(__val))NULL; \ - } \ if (__recsize) \ - __ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \ + __ret = __kfifo_in_r(__kfifo, &__val, sizeof(__val), \ __recsize); \ else { \ __ret = !kfifo_is_full(__tmp); \ @@ -404,7 +401,7 @@ __kfifo_int_must_check_helper( \ ((typeof(__tmp->type))__kfifo->data) : \ (__tmp->buf) \ )[__kfifo->in & __tmp->kfifo.mask] = \ - *(typeof(__tmp->type))__val; \ + (typeof(*__tmp->type))__val; \ smp_wmb(); \ __kfifo->in++; \ } \ @@ -415,7 +412,7 @@ __kfifo_int_must_check_helper( \ /** * kfifo_get - get data from the fifo * @fifo: address of the fifo to be used - * @val: the var where to store the data to be added + * @val: address where to store the data * * This macro reads the data from the fifo. * It returns 0 if the fifo was empty. Otherwise it returns the number @@ -428,12 +425,10 @@ __kfifo_int_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(__tmp->ptr) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) \ - __val = (typeof(__tmp->ptr))0; \ if (__recsize) \ __ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \ __recsize); \ @@ -456,7 +451,7 @@ __kfifo_uint_must_check_helper( \ /** * kfifo_peek - get data from the fifo without removing * @fifo: address of the fifo to be used - * @val: the var where to store the data to be added + * @val: address where to store the data * * This reads the data from the fifo without removing it from the fifo. * It returns 0 if the fifo was empty. Otherwise it returns the number @@ -469,12 +464,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(__tmp->ptr) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) \ - __val = (typeof(__tmp->ptr))NULL; \ if (__recsize) \ __ret = __kfifo_out_peek_r(__kfifo, __val, sizeof(*__val), \ __recsize); \ @@ -508,14 +501,10 @@ __kfifo_uint_must_check_helper( \ #define kfifo_in(fifo, buf, n) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr_const) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \ - __dummy = (typeof(__buf))NULL; \ - } \ (__recsize) ?\ __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_in(__kfifo, __buf, __n); \ @@ -561,14 +550,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr) __dummy = NULL; \ - __buf = __dummy; \ - } \ (__recsize) ?\ __kfifo_out_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_out(__kfifo, __buf, __n); \ @@ -773,14 +758,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr) __dummy __attribute__ ((unused)) = NULL; \ - __buf = __dummy; \ - } \ (__recsize) ? \ __kfifo_out_peek_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_out_peek(__kfifo, __buf, __n); \ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f9d78ec..b7c1716 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1269,7 +1269,7 @@ void memory_failure_queue(unsigned long pfn, int trapno, int flags) mf_cpu = &get_cpu_var(memory_failure_cpu); spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, &entry)) + if (kfifo_put(&mf_cpu->fifo, entry)) schedule_work_on(smp_processor_id(), &mf_cpu->work); else pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n", diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index cfe40ad..2fca916 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -64,7 +64,7 @@ static int __init testfunc(void) /* put values into the fifo */ for (i = 0; i != 10; i++) - kfifo_put(&test, &i); + kfifo_put(&test, i); /* show the number of used elements */ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); @@ -85,7 +85,7 @@ static int __init testfunc(void) kfifo_skip(&test); /* put values into the fifo until is full */ - for (i = 20; kfifo_put(&test, &i); i++) + for (i = 20; kfifo_put(&test, i); i++) ; printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 0647379..aa243db 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -39,7 +39,7 @@ static int __init example_init(void) kfifo_in(&fifo, "test", 4); for (i = 0; i != 9; i++) - kfifo_put(&fifo, &i); + kfifo_put(&fifo, i); /* kick away first byte */ kfifo_skip(&fifo); diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index 6f8e79e..8dc3c2e 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -61,7 +61,7 @@ static int __init testfunc(void) /* put values into the fifo */ for (i = 0; i != 10; i++) - kfifo_put(&test, &i); + kfifo_put(&test, i); /* show the number of used elements */ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); @@ -78,7 +78,7 @@ static int __init testfunc(void) kfifo_skip(&test); /* put values into the fifo until is full */ - for (i = 20; kfifo_put(&test, &i); i++) + for (i = 20; kfifo_put(&test, i); i++) ; printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); -- cgit v0.10.2 From a99b7069aab8fc3fb4f26d15795dc280b52e38b1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Nov 2013 14:32:18 -0800 Subject: hfsplus: Fix undefined __divdi3 in hfsplus_init_header_node() ERROR: "__divdi3" [fs/hfsplus/hfsplus.ko] undefined! Introduced by commit 099e9245e04d ("hfsplus: implement attributes file's header node initialization code"). i_size_read() returns loff_t, which is long long, i.e. 64-bit. node_size is size_t, which is either 32-bit or 64-bit. Hence "i_size_read(attr_file) / node_size" is a 64-by-32 or 64-by-64 division, causing (some versions of) gcc to emit a call to __divdi3(). Fortunately node_size is actually 16-bit, as the sole caller of hfsplus_init_header_node() passes a u16. Hence change its type from size_t to u16, and use do_div() to perform a 64-by-32 division. Not seen in m68k/allmodconfig in -next, so it really depends on the verion of gcc. Signed-off-by: Geert Uytterhoeven Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index efc85b1..3c6136f 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -129,7 +129,7 @@ static int can_set_xattr(struct inode *inode, const char *name, static void hfsplus_init_header_node(struct inode *attr_file, u32 clump_size, - char *buf, size_t node_size) + char *buf, u16 node_size) { struct hfs_bnode_desc *desc; struct hfs_btree_header_rec *head; @@ -139,8 +139,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, char *bmp; u32 used_nodes; u32 used_bmp_bytes; + loff_t tmp; - hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %zu\n", + hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n", clump_size, node_size); /* The end of the node contains list of record offsets */ @@ -154,7 +155,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, head = (struct hfs_btree_header_rec *)(buf + offset); head->node_size = cpu_to_be16(node_size); - head->node_count = cpu_to_be32(i_size_read(attr_file) / node_size); + tmp = i_size_read(attr_file); + do_div(tmp, node_size); + head->node_count = cpu_to_be32(tmp); head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1); head->clump_size = cpu_to_be32(clump_size); head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS); -- cgit v0.10.2 From 8d3ef556aba2b5b7d8b7144f7be1814d75ea3cc6 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Thu, 14 Nov 2013 14:32:19 -0800 Subject: cmdline-parser: fix build Fix following errors: include/linux/cmdline-parser.h:17:12: error: 'BDEVNAME_SIZE' undeclared here block/cmdline-parser.c:17:2: error: implicit declaration of function 'kzalloc' Signed-off-by: Alexander Beregalov Cc: CaiZhiyong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h index 98e892e..a0f9280 100644 --- a/include/linux/cmdline-parser.h +++ b/include/linux/cmdline-parser.h @@ -8,6 +8,8 @@ #define CMDLINEPARSEH #include +#include +#include /* partition flags */ #define PF_RDONLY 0x01 /* Device is read only */ -- cgit v0.10.2