summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c8
-rw-r--r--mm/internal.h5
-rw-r--r--mm/list_lru.c14
-rw-r--r--mm/madvise.c11
-rw-r--r--mm/memblock.c38
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/memory.c23
-rw-r--r--mm/memory_hotplug.c5
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/migrate.c11
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/mremap.c1
-rw-r--r--mm/nobootmem.c16
-rw-r--r--mm/oom_kill.c16
-rw-r--r--mm/page_alloc.c40
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/shmem.c16
-rw-r--r--mm/slab_common.c5
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_cgroup.c2
-rw-r--r--mm/vmalloc.c14
22 files changed, 186 insertions, 92 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d5b2b75..8258e9e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1227,8 +1227,11 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
*/
if (unlikely(pmd_trans_migrating(*fe->pmd))) {
page = pmd_page(*fe->pmd);
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(fe->ptl);
wait_on_page_locked(page);
+ put_page(page);
goto out;
}
@@ -1260,8 +1263,11 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(fe->ptl);
wait_on_page_locked(page);
+ put_page(page);
page_nid = -1;
goto out;
}
@@ -1367,8 +1373,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
get_page(page);
spin_unlock(ptl);
split_huge_page(page);
- put_page(page);
unlock_page(page);
+ put_page(page);
goto out_unlocked;
}
diff --git a/mm/internal.h b/mm/internal.h
index 537ac99..34a5459 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -472,6 +472,7 @@ struct tlbflush_unmap_batch;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
+void flush_tlb_batched_pending(struct mm_struct *mm);
#else
static inline void try_to_unmap_flush(void)
{
@@ -479,7 +480,9 @@ static inline void try_to_unmap_flush(void)
static inline void try_to_unmap_flush_dirty(void)
{
}
-
+static inline void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
extern const struct trace_print_flags pageflag_names[];
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 234676e..7a40fa2 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
l = list_lru_from_kmem(nlru, item);
list_add_tail(item, &l->list);
l->nr_items++;
+ nlru->nr_items++;
spin_unlock(&nlru->lock);
return true;
}
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
l = list_lru_from_kmem(nlru, item);
list_del_init(item);
l->nr_items--;
+ nlru->nr_items--;
spin_unlock(&nlru->lock);
return true;
}
@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one);
unsigned long list_lru_count_node(struct list_lru *lru, int nid)
{
- long count = 0;
- int memcg_idx;
+ struct list_lru_node *nlru;
- count += __list_lru_count_one(lru, nid, -1);
- if (list_lru_memcg_aware(lru)) {
- for_each_memcg_cache_index(memcg_idx)
- count += __list_lru_count_one(lru, nid, memcg_idx);
- }
- return count;
+ nlru = &lru->node[nid];
+ return nlru->nr_items;
}
EXPORT_SYMBOL_GPL(list_lru_count_node);
@@ -226,6 +223,7 @@ restart:
assert_spin_locked(&nlru->lock);
case LRU_REMOVED:
isolated++;
+ nlru->nr_items--;
/*
* If the lru lock has been dropped, our list
* traversal is now invalid and so we have to
diff --git a/mm/madvise.c b/mm/madvise.c
index 93fb63e..55f30ec 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -21,6 +21,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
+#include "internal.h"
#include <asm/tlb.h>
@@ -282,6 +283,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
@@ -329,8 +331,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
pte_offset_map_lock(mm, pmd, addr, &ptl);
goto out;
}
- put_page(page);
unlock_page(page);
+ put_page(page);
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
pte--;
addr -= PAGE_SIZE;
@@ -531,6 +533,8 @@ static long madvise_remove(struct vm_area_struct *vma,
static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
{
struct page *p;
+ struct zone *zone;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
for (; start < end; start += PAGE_SIZE <<
@@ -559,6 +563,11 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
if (ret)
return ret;
}
+
+ /* Ensure that all poisoned pages are removed from per-cpu lists */
+ for_each_populated_zone(zone)
+ drain_all_pages(zone);
+
return 0;
}
#endif
diff --git a/mm/memblock.c b/mm/memblock.c
index 9f6be74..3740af5 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -297,31 +297,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
}
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-
-phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
- phys_addr_t *addr)
-{
- if (memblock.reserved.regions == memblock_reserved_init_regions)
- return 0;
-
- *addr = __pa(memblock.reserved.regions);
-
- return PAGE_ALIGN(sizeof(struct memblock_region) *
- memblock.reserved.max);
-}
-
-phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
- phys_addr_t *addr)
+/**
+ * Discard memory and reserved arrays if they were allocated
+ */
+void __init memblock_discard(void)
{
- if (memblock.memory.regions == memblock_memory_init_regions)
- return 0;
+ phys_addr_t addr, size;
- *addr = __pa(memblock.memory.regions);
+ if (memblock.reserved.regions != memblock_reserved_init_regions) {
+ addr = __pa(memblock.reserved.regions);
+ size = PAGE_ALIGN(sizeof(struct memblock_region) *
+ memblock.reserved.max);
+ __memblock_free_late(addr, size);
+ }
- return PAGE_ALIGN(sizeof(struct memblock_region) *
- memblock.memory.max);
+ if (memblock.memory.regions != memblock_memory_init_regions) {
+ addr = __pa(memblock.memory.regions);
+ size = PAGE_ALIGN(sizeof(struct memblock_region) *
+ memblock.memory.max);
+ __memblock_free_late(addr, size);
+ }
}
-
#endif
/**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47559cc..2a800c4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -462,6 +462,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
struct mem_cgroup_tree_per_node *mctz;
mctz = soft_limit_tree_from_page(page);
+ if (!mctz)
+ return;
/*
* Necessary to update all ancestors when hierarchy is used.
* because their event counter is not touched.
@@ -499,7 +501,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
for_each_node(nid) {
mz = mem_cgroup_nodeinfo(memcg, nid);
mctz = soft_limit_tree_node(nid);
- mem_cgroup_remove_exceeded(mz, mctz);
+ if (mctz)
+ mem_cgroup_remove_exceeded(mz, mctz);
}
}
@@ -2565,7 +2568,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
* is empty. Do it lockless to prevent lock bouncing. Races
* are acceptable as soft limit is best effort anyway.
*/
- if (RB_EMPTY_ROOT(&mctz->rb_root))
+ if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
return 0;
/*
diff --git a/mm/memory.c b/mm/memory.c
index e6a5a1f..1aa63e7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1124,6 +1124,7 @@ again:
init_rss_vec(rss);
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
pte = start_pte;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = *pte;
@@ -3595,6 +3596,11 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
+ if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+ flags & FAULT_FLAG_INSTRUCTION,
+ flags & FAULT_FLAG_REMOTE))
+ return VM_FAULT_SIGSEGV;
+
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
@@ -3602,11 +3608,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
if (flags & FAULT_FLAG_USER)
mem_cgroup_oom_enable();
- if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
- flags & FAULT_FLAG_INSTRUCTION,
- flags & FAULT_FLAG_REMOTE))
- return VM_FAULT_SIGSEGV;
-
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
else
@@ -3634,8 +3635,18 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
* further.
*/
if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR)
- && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags)))
+ && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) {
+
+ /*
+ * We are going to enforce SIGBUS but the PF path might have
+ * dropped the mmap_sem already so take it again so that
+ * we do not break expectations of all arch specific PF paths
+ * and g-u-p
+ */
+ if (ret & VM_FAULT_RETRY)
+ down_read(&vma->vm_mm->mmap_sem);
ret = VM_FAULT_SIGBUS;
+ }
return ret;
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ede13734..c9f715b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -179,7 +179,7 @@ static void release_memory_resource(struct resource *res)
void get_page_bootmem(unsigned long info, struct page *page,
unsigned long type)
{
- page->lru.next = (struct list_head *) type;
+ page->freelist = (void *)type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
@@ -189,11 +189,12 @@ void put_page_bootmem(struct page *page)
{
unsigned long type;
- type = (unsigned long) page->lru.next;
+ type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
+ page->freelist = NULL;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 23471526..a8ab5e7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -926,11 +926,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
*policy |= (pol->flags & MPOL_MODE_FLAGS);
}
- if (vma) {
- up_read(&current->mm->mmap_sem);
- vma = NULL;
- }
-
err = 0;
if (nmask) {
if (mpol_store_user_nodemask(pol)) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 6850f62..821623f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -40,6 +40,7 @@
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/page_owner.h>
+#include <linux/ptrace.h>
#include <asm/tlbflush.h>
@@ -1663,7 +1664,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
const int __user *, nodes,
int __user *, status, int, flags)
{
- const struct cred *cred = current_cred(), *tcred;
struct task_struct *task;
struct mm_struct *mm;
int err;
@@ -1687,14 +1687,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
/*
* Check if this process has the right to modify the specified
- * process. The right exists if the process has administrative
- * capabilities, superuser privileges or the same
- * userid as the target process.
+ * process. Use the regular "ptrace_may_access()" checks.
*/
- tcred = __task_cred(task);
- if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) &&
- !capable(CAP_SYS_NICE)) {
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
rcu_read_unlock();
err = -EPERM;
goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index 145d3d5..75d263b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2228,7 +2228,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/* Guard against exceeding limits of the address space. */
address &= PAGE_MASK;
- if (address >= TASK_SIZE)
+ if (address >= (TASK_SIZE & PAGE_MASK))
return -ENOMEM;
address += PAGE_SIZE;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1193652..ae740c9 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -74,6 +74,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (!pte)
return 0;
+ flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
do {
oldpte = *pte;
diff --git a/mm/mremap.c b/mm/mremap.c
index 30d7d24..1597671 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -142,6 +142,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
new_ptl = pte_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+ flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 487dad6..ab99812 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void)
NULL)
count += __free_memory_core(start, end);
-#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
- {
- phys_addr_t size;
-
- /* Free memblock.reserved array if it was allocated */
- size = get_allocated_memblock_reserved_regions_info(&start);
- if (size)
- count += __free_memory_core(start, start + size);
-
- /* Free memblock.memory array if it was allocated */
- size = get_allocated_memblock_memory_regions_info(&start);
- if (size)
- count += __free_memory_core(start, start + size);
- }
-#endif
-
return count;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ec9f11d..d631d25 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -37,6 +37,7 @@
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/init.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -491,6 +492,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
}
/*
+ * If the mm has notifiers then we would need to invalidate them around
+ * unmap_page_range and that is risky because notifiers can sleep and
+ * what they do is basically undeterministic. So let's have a short
+ * sleep to give the oom victim some more time.
+ * TODO: we really want to get rid of this ugly hack and make sure that
+ * notifiers cannot block for unbounded amount of time and add
+ * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
+ */
+ if (mm_has_notifiers(mm)) {
+ up_read(&mm->mmap_sem);
+ schedule_timeout_idle(HZ);
+ goto unlock_oom;
+ }
+
+ /*
* increase mm_users only after we know we will reap something so
* that the mmput_async is called only when we have reaped something
* and delayed __mmput doesn't matter that much
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 56df8c2..7064aae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1587,6 +1587,10 @@ void __init page_alloc_init_late(void)
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
#endif
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+ /* Discard memblock private memory */
+ memblock_discard();
+#endif
for_each_populated_zone(zone)
set_zone_contiguous(zone);
@@ -1875,14 +1879,14 @@ int move_freepages(struct zone *zone,
#endif
for (page = start_page; page <= end_page;) {
- /* Make sure we are not inadvertently changing nodes */
- VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
-
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
continue;
}
+ /* Make sure we are not inadvertently changing nodes */
+ VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
+
if (!PageBuddy(page)) {
page++;
continue;
@@ -2096,13 +2100,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
continue;
/*
- * It should never happen but changes to locking could
- * inadvertently allow a per-cpu drain to add pages
- * to MIGRATE_HIGHATOMIC while unreserving so be safe
- * and watch for underflows.
+ * In page freeing path, migratetype change is racy so
+ * we can counter several free pages in a pageblock
+ * in this loop althoug we changed the pageblock type
+ * from highatomic to ac->migratetype. So we should
+ * adjust the count once.
*/
- zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
- zone->nr_reserved_highatomic);
+ if (get_pageblock_migratetype(page) ==
+ MIGRATE_HIGHATOMIC) {
+ /*
+ * It should never happen but changes to
+ * locking could inadvertently allow a per-cpu
+ * drain to add pages to MIGRATE_HIGHATOMIC
+ * while unreserving so be safe and watch for
+ * underflows.
+ */
+ zone->nr_reserved_highatomic -= min(
+ pageblock_nr_pages,
+ zone->nr_reserved_highatomic);
+ }
/*
* Convert to ac->migratetype and avoid the normal
@@ -6445,8 +6461,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
}
if (pages && s)
- pr_info("Freeing %s memory: %ldK (%p - %p)\n",
- s, pages << (PAGE_SHIFT - 10), start, end);
+ pr_info("Freeing %s memory: %ldK\n",
+ s, pages << (PAGE_SHIFT - 10));
return pages;
}
@@ -7335,7 +7351,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, false)) {
- pr_info("%s: [%lx, %lx) PFNs busy\n",
+ pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
__func__, outer_start, end);
ret = -EBUSY;
goto done;
diff --git a/mm/rmap.c b/mm/rmap.c
index cd37c1c..94488b0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -617,6 +617,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
tlb_ubc->flush_required = true;
/*
+ * Ensure compiler does not re-order the setting of tlb_flush_batched
+ * before the PTE is cleared.
+ */
+ barrier();
+ mm->tlb_flush_batched = true;
+
+ /*
* If the PTE was dirty then it's best to assume it's writable. The
* caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
* before the page is queued for IO.
@@ -643,6 +650,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
return should_defer;
}
+
+/*
+ * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
+ * releasing the PTL if TLB flushes are batched. It's possible for a parallel
+ * operation such as mprotect or munmap to race between reclaim unmapping
+ * the page and flushing the page. If this race occurs, it potentially allows
+ * access to data via a stale TLB entry. Tracking all mm's that have TLB
+ * batching in flight would be expensive during reclaim so instead track
+ * whether TLB batching occurred in the past and if so then do a flush here
+ * if required. This will cost one additional flush per reclaim cycle paid
+ * by the first operation at risk such as mprotect and mumap.
+ *
+ * This must be called under the PTL so that an access to tlb_flush_batched
+ * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
+ * via the PTL.
+ */
+void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ if (mm->tlb_flush_batched) {
+ flush_tlb_mm(mm);
+
+ /*
+ * Do not allow the compiler to re-order the clearing of
+ * tlb_flush_batched before the tlb is flushed.
+ */
+ barrier();
+ mm->tlb_flush_batched = false;
+ }
+}
#else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
struct page *page, bool writable)
diff --git a/mm/shmem.c b/mm/shmem.c
index d99cfb6..004e0f87 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1007,7 +1007,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
*/
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
spin_lock(&sbinfo->shrinklist_lock);
- if (list_empty(&info->shrinklist)) {
+ /*
+ * _careful to defend against unlocked access to
+ * ->shrink_list in shmem_unused_huge_shrink()
+ */
+ if (list_empty_careful(&info->shrinklist)) {
list_add_tail(&info->shrinklist,
&sbinfo->shrinklist);
sbinfo->shrinklist_len++;
@@ -1774,7 +1778,11 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
* to shrink under memory pressure.
*/
spin_lock(&sbinfo->shrinklist_lock);
- if (list_empty(&info->shrinklist)) {
+ /*
+ * _careful to defend against unlocked access to
+ * ->shrink_list in shmem_unused_huge_shrink()
+ */
+ if (list_empty_careful(&info->shrinklist)) {
list_add_tail(&info->shrinklist,
&sbinfo->shrinklist);
sbinfo->shrinklist_len++;
@@ -3802,7 +3810,7 @@ int __init shmem_init(void)
}
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
- if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
+ if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
shmem_huge = 0; /* just in case it was patched */
@@ -3863,7 +3871,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
return -EINVAL;
shmem_huge = huge;
- if (shmem_huge < SHMEM_HUGE_DENY)
+ if (shmem_huge > SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
return count;
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 5d2f24f..622f6b6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -255,7 +255,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
{
struct kmem_cache *s;
- if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
+ if (slab_nomerge)
return NULL;
if (ctor)
@@ -266,6 +266,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
+ if (flags & SLAB_NEVER_MERGE)
+ return NULL;
+
list_for_each_entry_reverse(s, &slab_caches, list) {
if (slab_unmergeable(s))
continue;
diff --git a/mm/sparse.c b/mm/sparse.c
index 1e168bf..8c4c82e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
- magic = (unsigned long) page->lru.next;
+ magic = (unsigned long) page->freelist;
BUG_ON(magic == NODE_INFO);
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 454d6d7..3405b4e 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -204,6 +204,8 @@ void swap_cgroup_swapoff(int type)
struct page *page = map[i];
if (page)
__free_page(page);
+ if (!(i % SWAP_CLUSTER_MAX))
+ cond_resched();
}
vfree(map);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2481cb..195de42 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -244,11 +244,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
*/
VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
+ /*
+ * Don't dereference bad PUD or PMD (below) entries. This will also
+ * identify huge mappings, which we may encounter on architectures
+ * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+ * identified as vmalloc addresses by is_vmalloc_addr(), but are
+ * not [unambiguously] associated with a struct page, so there is
+ * no correct value to return for them.
+ */
if (!pgd_none(*pgd)) {
pud_t *pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
+ WARN_ON_ONCE(pud_bad(*pud));
+ if (!pud_none(*pud) && !pud_bad(*pud)) {
pmd_t *pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
+ WARN_ON_ONCE(pmd_bad(*pmd));
+ if (!pmd_none(*pmd) && !pmd_bad(*pmd)) {
pte_t *ptep, pte;
ptep = pte_offset_map(pmd, addr);