summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c178
1 files changed, 108 insertions, 70 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 5d9025f..d176154 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,8 +69,8 @@
#include "internal.h"
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
-#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
+#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
#endif
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -382,7 +382,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
pte_free_tlb(tlb, token, addr);
- atomic_long_dec(&tlb->mm->nr_ptes);
+ tlb->mm->nr_ptes--;
}
static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -453,6 +453,8 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
/*
* This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
*/
void free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
@@ -550,7 +552,6 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long address)
{
- spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address);
int wait_split_huge_page;
if (!new)
@@ -571,15 +572,15 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
*/
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
- ptl = pmd_lock(mm, pmd);
+ spin_lock(&mm->page_table_lock);
wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
- atomic_long_inc(&mm->nr_ptes);
+ mm->nr_ptes++;
pmd_populate(mm, pmd, new);
new = NULL;
} else if (unlikely(pmd_trans_splitting(*pmd)))
wait_split_huge_page = 1;
- spin_unlock(ptl);
+ spin_unlock(&mm->page_table_lock);
if (new)
pte_free(mm, new);
if (wait_split_huge_page)
@@ -680,7 +681,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
if (vma->vm_ops)
printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
vma->vm_ops->fault);
- if (vma->vm_file)
+ if (vma->vm_file && vma->vm_file->f_op)
printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
vma->vm_file->f_op->mmap);
dump_stack();
@@ -1517,20 +1518,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
split_huge_page_pmd(vma, address, pmd);
goto split_fallthrough;
}
- ptl = pmd_lock(mm, pmd);
+ spin_lock(&mm->page_table_lock);
if (likely(pmd_trans_huge(*pmd))) {
if (unlikely(pmd_trans_splitting(*pmd))) {
- spin_unlock(ptl);
+ spin_unlock(&mm->page_table_lock);
wait_split_huge_page(vma->anon_vma, pmd);
} else {
page = follow_trans_huge_pmd(vma, address,
pmd, flags);
- spin_unlock(ptl);
+ spin_unlock(&mm->page_table_lock);
*page_mask = HPAGE_PMD_NR - 1;
goto out;
}
} else
- spin_unlock(ptl);
+ spin_unlock(&mm->page_table_lock);
/* fall through */
}
split_fallthrough:
@@ -2720,14 +2721,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
get_page(dirty_page);
reuse:
- /*
- * Clear the pages cpupid information as the existing
- * information potentially belongs to a now completely
- * unrelated process.
- */
- if (old_page)
- page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1);
-
flush_cache_page(vma, address, pte_pfn(orig_pte));
entry = pte_mkyoung(orig_pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -3528,16 +3521,13 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
- unsigned long addr, int page_nid,
- int *flags)
+ unsigned long addr, int page_nid)
{
get_page(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
- if (page_nid == numa_node_id()) {
+ if (page_nid == numa_node_id())
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
- *flags |= TNF_FAULT_LOCAL;
- }
return mpol_misplaced(page, vma, addr);
}
@@ -3548,10 +3538,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page = NULL;
spinlock_t *ptl;
int page_nid = -1;
- int last_cpupid;
int target_nid;
bool migrated = false;
- int flags = 0;
/*
* The "pte" at this point cannot be used safely without
@@ -3578,26 +3566,9 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_unmap_unlock(ptep, ptl);
return 0;
}
- BUG_ON(is_zero_pfn(page_to_pfn(page)));
-
- /*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- */
- if (!pte_write(pte))
- flags |= TNF_NO_GROUP;
-
- /*
- * Flag if the page is shared between multiple address spaces. This
- * is later used when determining whether to group tasks together
- */
- if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
- flags |= TNF_SHARED;
- last_cpupid = page_cpupid_last(page);
page_nid = page_to_nid(page);
- target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags);
+ target_nid = numa_migrate_prep(page, vma, addr, page_nid);
pte_unmap_unlock(ptep, ptl);
if (target_nid == -1) {
put_page(page);
@@ -3605,18 +3576,103 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/* Migrate to the requested node */
- migrated = migrate_misplaced_page(page, vma, target_nid);
- if (migrated) {
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
page_nid = target_nid;
- flags |= TNF_MIGRATED;
- }
out:
if (page_nid != -1)
- task_numa_fault(last_cpupid, page_nid, 1, flags);
+ task_numa_fault(page_nid, 1, migrated);
return 0;
}
+/* NUMA hinting page fault entry point for regular pmds */
+#ifdef CONFIG_NUMA_BALANCING
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd;
+ pte_t *pte, *orig_pte;
+ unsigned long _addr = addr & PMD_MASK;
+ unsigned long offset;
+ spinlock_t *ptl;
+ bool numa = false;
+
+ spin_lock(&mm->page_table_lock);
+ pmd = *pmdp;
+ if (pmd_numa(pmd)) {
+ set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
+ numa = true;
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ if (!numa)
+ return 0;
+
+ /* we're in a page fault so some vma must be in the range */
+ BUG_ON(!vma);
+ BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
+ offset = max(_addr, vma->vm_start) & ~PMD_MASK;
+ VM_BUG_ON(offset >= PMD_SIZE);
+ orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
+ pte += offset >> PAGE_SHIFT;
+ for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
+ pte_t pteval = *pte;
+ struct page *page;
+ int page_nid = -1;
+ int target_nid;
+ bool migrated = false;
+
+ if (!pte_present(pteval))
+ continue;
+ if (!pte_numa(pteval))
+ continue;
+ if (addr >= vma->vm_end) {
+ vma = find_vma(mm, addr);
+ /* there's a pte present so there must be a vma */
+ BUG_ON(!vma);
+ BUG_ON(addr < vma->vm_start);
+ }
+ if (pte_numa(pteval)) {
+ pteval = pte_mknonnuma(pteval);
+ set_pte_at(mm, addr, pte, pteval);
+ }
+ page = vm_normal_page(vma, addr, pteval);
+ if (unlikely(!page))
+ continue;
+ /* only check non-shared pages */
+ if (unlikely(page_mapcount(page) != 1))
+ continue;
+
+ page_nid = page_to_nid(page);
+ target_nid = numa_migrate_prep(page, vma, addr, page_nid);
+ pte_unmap_unlock(pte, ptl);
+ if (target_nid != -1) {
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
+ page_nid = target_nid;
+ } else {
+ put_page(page);
+ }
+
+ if (page_nid != -1)
+ task_numa_fault(page_nid, 1, migrated);
+
+ pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ }
+ pte_unmap_unlock(orig_pte, ptl);
+
+ return 0;
+}
+#else
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ BUG();
+ return 0;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3755,8 +3811,8 @@ retry:
}
}
- /* THP should already have been handled */
- BUG_ON(pmd_numa(*pmd));
+ if (pmd_numa(*pmd))
+ return do_pmd_numa_page(mm, vma, address, pmd);
/*
* Use __pte_alloc instead of pte_alloc_map, because we can't
@@ -4270,21 +4326,3 @@ void copy_user_huge_page(struct page *dst, struct page *src,
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-
-#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-bool ptlock_alloc(struct page *page)
-{
- spinlock_t *ptl;
-
- ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
- if (!ptl)
- return false;
- page->ptl = ptl;
- return true;
-}
-
-void ptlock_free(struct page *page)
-{
- kfree(page->ptl);
-}
-#endif