diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2014-11-03 11:06:21 (GMT) |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2014-11-03 11:06:21 (GMT) |
commit | 123b2dd10b4659911ef38458573a57f94cbf5448 (patch) | |
tree | 536765a44b6fb25f024a8387f4677f994015b041 /mm | |
parent | f62c95fd4041d669159dd76ac0bb2a7f86b5b05d (diff) | |
parent | 0df1f2487d2f0d04703f142813d53615d62a1da4 (diff) | |
download | linux-123b2dd10b4659911ef38458573a57f94cbf5448.tar.xz |
Merge remote-tracking branch 'origin/master' into HEAD
Several important fixes went in between 3.18-rc1 and 3.18-rc3, so
KVM/x86 work for 3.19 will be based on 3.18-rc3.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/balloon_compaction.c | 2 | ||||
-rw-r--r-- | mm/compaction.c | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 15 | ||||
-rw-r--r-- | mm/memcontrol.c | 105 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/page-writeback.c | 43 | ||||
-rw-r--r-- | mm/page_cgroup.c | 1 | ||||
-rw-r--r-- | mm/rmap.c | 88 | ||||
-rw-r--r-- | mm/slab_common.c | 10 |
11 files changed, 142 insertions, 139 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b3cbe19..fcad832 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) * to be released by the balloon driver. */ if (trylock_page(page)) { +#ifdef CONFIG_BALLOON_COMPACTION if (!PagePrivate(page)) { /* raced with isolation */ unlock_page(page); continue; } +#endif spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_delete(page); __count_vm_event(BALLOON_DEFLATE); diff --git a/mm/compaction.c b/mm/compaction.c index edba18a..ec74cf0 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -784,6 +784,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, cc->nr_migratepages = 0; break; } + + if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + break; } acct_isolated(cc->zone, cc); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 74c78aa..de98415 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -200,7 +200,7 @@ retry: preempt_disable(); if (cmpxchg(&huge_zero_page, NULL, zero_page)) { preempt_enable(); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); goto retry; } @@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (unlikely(khugepaged_enter(vma, vma->vm_flags))) return VM_FAULT_OOM; if (!(flags & FAULT_FLAG_WRITE) && transparent_hugepage_use_zero_page()) { @@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma, * register it here without waiting a page fault that * may not happen any time soon. */ - if (unlikely(khugepaged_enter_vma_merge(vma))) + if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) return -ENOMEM; break; case MADV_NOHUGEPAGE: @@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm) return 0; } -int khugepaged_enter_vma_merge(struct vm_area_struct *vma) +int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags) { unsigned long hstart, hend; if (!vma->anon_vma) @@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) if (vma->vm_ops) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); + VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) - return khugepaged_enter(vma); + return khugepaged_enter(vma, vm_flags); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23976fd..d6ac0e3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) * start move here. */ -/* for quick checking without looking up memcg */ -atomic_t memcg_moving __read_mostly; - static void mem_cgroup_start_move(struct mem_cgroup *memcg) { - atomic_inc(&memcg_moving); atomic_inc(&memcg->moving_account); synchronize_rcu(); } @@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg) * Now, mem_cgroup_clear_mc() may call this function with NULL. * We check NULL in callee rather than caller. */ - if (memcg) { - atomic_dec(&memcg_moving); + if (memcg) atomic_dec(&memcg->moving_account); - } } /* @@ -2204,41 +2198,52 @@ cleanup: return true; } -/* - * Used to update mapped file or writeback or other statistics. +/** + * mem_cgroup_begin_page_stat - begin a page state statistics transaction + * @page: page that is going to change accounted state + * @locked: &memcg->move_lock slowpath was taken + * @flags: IRQ-state flags for &memcg->move_lock * - * Notes: Race condition + * This function must mark the beginning of an accounted page state + * change to prevent double accounting when the page is concurrently + * being moved to another memcg: * - * Charging occurs during page instantiation, while the page is - * unmapped and locked in page migration, or while the page table is - * locked in THP migration. No race is possible. + * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + * if (TestClearPageState(page)) + * mem_cgroup_update_page_stat(memcg, state, -1); + * mem_cgroup_end_page_stat(memcg, locked, flags); * - * Uncharge happens to pages with zero references, no race possible. + * The RCU lock is held throughout the transaction. The fast path can + * get away without acquiring the memcg->move_lock (@locked is false) + * because page moving starts with an RCU grace period. * - * Charge moving between groups is protected by checking mm->moving - * account and taking the move_lock in the slowpath. + * The RCU lock also protects the memcg from being freed when the page + * state that is going to change is the only thing preventing the page + * from being uncharged. E.g. end-writeback clearing PageWriteback(), + * which allows migration to go ahead and uncharge the page before the + * account transaction might be complete. */ - -void __mem_cgroup_begin_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, + bool *locked, + unsigned long *flags) { struct mem_cgroup *memcg; struct page_cgroup *pc; + rcu_read_lock(); + + if (mem_cgroup_disabled()) + return NULL; + pc = lookup_page_cgroup(page); again: memcg = pc->mem_cgroup; if (unlikely(!memcg || !PageCgroupUsed(pc))) - return; - /* - * If this memory cgroup is not under account moving, we don't - * need to take move_lock_mem_cgroup(). Because we already hold - * rcu_read_lock(), any calls to move_account will be delayed until - * rcu_read_unlock(). - */ - VM_BUG_ON(!rcu_read_lock_held()); + return NULL; + + *locked = false; if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg; move_lock_mem_cgroup(memcg, flags); if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { @@ -2246,36 +2251,40 @@ again: goto again; } *locked = true; + + return memcg; } -void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) +/** + * mem_cgroup_end_page_stat - finish a page state statistics transaction + * @memcg: the memcg that was accounted against + * @locked: value received from mem_cgroup_begin_page_stat() + * @flags: value received from mem_cgroup_begin_page_stat() + */ +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, + unsigned long flags) { - struct page_cgroup *pc = lookup_page_cgroup(page); + if (memcg && locked) + move_unlock_mem_cgroup(memcg, &flags); - /* - * It's guaranteed that pc->mem_cgroup never changes while - * lock is held because a routine modifies pc->mem_cgroup - * should take move_lock_mem_cgroup(). - */ - move_unlock_mem_cgroup(pc->mem_cgroup, flags); + rcu_read_unlock(); } -void mem_cgroup_update_page_stat(struct page *page, +/** + * mem_cgroup_update_page_stat - update page state statistics + * @memcg: memcg to account against + * @idx: page state item to account + * @val: number of pages (positive or negative) + * + * See mem_cgroup_begin_page_stat() for locking requirements. + */ +void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx, int val) { - struct mem_cgroup *memcg; - struct page_cgroup *pc = lookup_page_cgroup(page); - unsigned long uninitialized_var(flags); - - if (mem_cgroup_disabled()) - return; - VM_BUG_ON(!rcu_read_lock_held()); - memcg = pc->mem_cgroup; - if (unlikely(!memcg || !PageCgroupUsed(pc))) - return; - this_cpu_add(memcg->stat->count[idx], val); + if (memcg) + this_cpu_add(memcg->stat->count[idx], val); } /* diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfb..3e50383 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1147,6 +1147,7 @@ again: print_bad_pte(vma, addr, ptent, page); if (unlikely(!__tlb_remove_page(tlb, page))) { force_flush = 1; + addr += PAGE_SIZE; break; } continue; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 29d8693..252e1db 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1912,7 +1912,6 @@ void try_offline_node(int nid) unsigned long start_pfn = pgdat->node_start_pfn; unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; unsigned long pfn; - struct page *pgdat_page = virt_to_page(pgdat); int i; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { @@ -1941,10 +1940,6 @@ void try_offline_node(int nid) node_set_offline(nid); unregister_one_node(nid); - if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page)) - /* node data is allocated from boot memory */ - return; - /* free waittable in each zone */ for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; @@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, end, prev->vm_pgoff, NULL); if (err) return NULL; - khugepaged_enter_vma_merge(prev); + khugepaged_enter_vma_merge(prev, vm_flags); return prev; } @@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, next->vm_pgoff - pglen, NULL); if (err) return NULL; - khugepaged_enter_vma_merge(area); + khugepaged_enter_vma_merge(area, vm_flags); return area; } @@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } } vma_unlock_anon_vma(vma); - khugepaged_enter_vma_merge(vma); + khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; } @@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma, } } vma_unlock_anon_vma(vma); - khugepaged_enter_vma_merge(vma); + khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ff24c9d..19ceae8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2116,23 +2116,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) EXPORT_SYMBOL(account_page_dirtied); /* - * Helper function for set_page_writeback family. - * - * The caller must hold mem_cgroup_begin/end_update_page_stat() lock - * while calling this function. - * See test_set_page_writeback for example. - * - * NOTE: Unlike account_page_dirtied this does not rely on being atomic - * wrt interrupts. - */ -void account_page_writeback(struct page *page) -{ - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); - inc_zone_page_state(page, NR_WRITEBACK); -} -EXPORT_SYMBOL(account_page_writeback); - -/* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * @@ -2344,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); - int ret; - bool locked; unsigned long memcg_flags; + struct mem_cgroup *memcg; + bool locked; + int ret; - mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); + memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2369,22 +2353,23 @@ int test_clear_page_writeback(struct page *page) ret = TestClearPageWriteback(page); } if (ret) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); + mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); dec_zone_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_WRITTEN); } - mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); + mem_cgroup_end_page_stat(memcg, locked, memcg_flags); return ret; } int __test_set_page_writeback(struct page *page, bool keep_write) { struct address_space *mapping = page_mapping(page); - int ret; - bool locked; unsigned long memcg_flags; + struct mem_cgroup *memcg; + bool locked; + int ret; - mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); + memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2410,9 +2395,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write) } else { ret = TestSetPageWriteback(page); } - if (!ret) - account_page_writeback(page); - mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); + if (!ret) { + mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); + inc_zone_page_state(page, NR_WRITEBACK); + } + mem_cgroup_end_page_stat(memcg, locked, memcg_flags); return ret; } diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 3708264..5331c2b 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -171,6 +171,7 @@ static void free_page_cgroup(void *addr) sizeof(struct page_cgroup) * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); + kmemleak_free(addr); free_pages_exact(addr, table_size); } } @@ -1042,15 +1042,46 @@ void page_add_new_anon_rmap(struct page *page, */ void page_add_file_rmap(struct page *page) { - bool locked; + struct mem_cgroup *memcg; unsigned long flags; + bool locked; - mem_cgroup_begin_update_page_stat(page, &locked, &flags); + memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); if (atomic_inc_and_test(&page->_mapcount)) { __inc_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); } - mem_cgroup_end_update_page_stat(page, &locked, &flags); + mem_cgroup_end_page_stat(memcg, locked, flags); +} + +static void page_remove_file_rmap(struct page *page) +{ + struct mem_cgroup *memcg; + unsigned long flags; + bool locked; + + memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + + /* page still mapped by someone else? */ + if (!atomic_add_negative(-1, &page->_mapcount)) + goto out; + + /* Hugepages are not counted in NR_FILE_MAPPED for now. */ + if (unlikely(PageHuge(page))) + goto out; + + /* + * We use the irq-unsafe __{inc|mod}_zone_page_stat because + * these counters are not modified in interrupt context, and + * pte lock(a spinlock) is held, which implies preemption disabled. + */ + __dec_zone_page_state(page, NR_FILE_MAPPED); + mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); + + if (unlikely(PageMlocked(page))) + clear_page_mlock(page); +out: + mem_cgroup_end_page_stat(memcg, locked, flags); } /** @@ -1061,46 +1092,33 @@ void page_add_file_rmap(struct page *page) */ void page_remove_rmap(struct page *page) { - bool anon = PageAnon(page); - bool locked; - unsigned long flags; - - /* - * The anon case has no mem_cgroup page_stat to update; but may - * uncharge_page() below, where the lock ordering can deadlock if - * we hold the lock against page_stat move: so avoid it on anon. - */ - if (!anon) - mem_cgroup_begin_update_page_stat(page, &locked, &flags); + if (!PageAnon(page)) { + page_remove_file_rmap(page); + return; + } /* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) - goto out; + return; + + /* Hugepages are not counted in NR_ANON_PAGES for now. */ + if (unlikely(PageHuge(page))) + return; /* - * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED - * and not charged by memcg for now. - * * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and - * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ - if (unlikely(PageHuge(page))) - goto out; - if (anon) { - if (PageTransHuge(page)) - __dec_zone_page_state(page, - NR_ANON_TRANSPARENT_HUGEPAGES); - __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, - -hpage_nr_pages(page)); - } else { - __dec_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); - mem_cgroup_end_update_page_stat(page, &locked, &flags); - } + if (PageTransHuge(page)) + __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + -hpage_nr_pages(page)); + if (unlikely(PageMlocked(page))) clear_page_mlock(page); + /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap @@ -1110,10 +1128,6 @@ void page_remove_rmap(struct page *page) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - return; -out: - if (!anon) - mem_cgroup_end_update_page_stat(page, &locked, &flags); } /* diff --git a/mm/slab_common.c b/mm/slab_common.c index 3a6e0cf..4069442 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -93,16 +93,6 @@ static int kmem_cache_sanity_check(const char *name, size_t size) s->object_size); continue; } - -#if !defined(CONFIG_SLUB) - if (!strcmp(s->name, name)) { - pr_err("%s (%s): Cache name already exists.\n", - __func__, name); - dump_stack(); - s = NULL; - return -EINVAL; - } -#endif } WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |