summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c66
1 files changed, 49 insertions, 17 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cca80d9..389973f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -884,6 +884,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
ret = 0;
goto out_unlock;
}
+
+ /* mmap_sem prevents this happening but warn if that changes */
+ WARN_ON(pmd_trans_migrating(pmd));
+
if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */
spin_unlock(&src_mm->page_table_lock);
@@ -1150,14 +1154,16 @@ alloc:
new_page = NULL;
if (unlikely(!new_page)) {
- if (is_huge_zero_pmd(orig_pmd)) {
+ if (!page) {
ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
address, pmd, orig_pmd, haddr);
} else {
ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
pmd, orig_pmd, page, haddr);
- if (ret & VM_FAULT_OOM)
+ if (ret & VM_FAULT_OOM) {
split_huge_page(page);
+ ret |= VM_FAULT_FALLBACK;
+ }
put_page(page);
}
count_vm_event(THP_FAULT_FALLBACK);
@@ -1169,15 +1175,16 @@ alloc:
if (page) {
split_huge_page(page);
put_page(page);
- }
+ } else
+ split_huge_page_pmd(vma, address, pmd);
+ ret |= VM_FAULT_FALLBACK;
count_vm_event(THP_FAULT_FALLBACK);
- ret |= VM_FAULT_OOM;
goto out;
}
count_vm_event(THP_FAULT_ALLOC);
- if (is_huge_zero_pmd(orig_pmd))
+ if (!page)
clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
else
copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
@@ -1203,7 +1210,7 @@ alloc:
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
- if (is_huge_zero_pmd(orig_pmd)) {
+ if (!page) {
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
put_huge_zero_page();
} else {
@@ -1240,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);
+ /* Full NUMA hinting faults to serialise migration in fault paths */
+ if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ goto out;
+
page = pmd_page(*pmd);
VM_BUG_ON(!PageHead(page));
if (flags & FOLL_TOUCH) {
@@ -1290,6 +1301,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
+ /*
+ * If there are potential migrations, wait for completion and retry
+ * without disrupting NUMA hinting information. Do not relock and
+ * check_same as the page may no longer be mapped.
+ */
+ if (unlikely(pmd_trans_migrating(*pmdp))) {
+ spin_unlock(&mm->page_table_lock);
+ wait_migrate_huge_page(vma->anon_vma, pmdp);
+ goto out;
+ }
+
page = pmd_page(pmd);
page_nid = page_to_nid(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
@@ -1306,23 +1328,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* If the page was locked, there are no parallel migrations */
if (page_locked)
goto clear_pmdnuma;
+ }
- /*
- * Otherwise wait for potential migrations and retry. We do
- * relock and check_same as the page may no longer be mapped.
- * As the fault is being retried, do not account for it.
- */
+ /* Migration could have started since the pmd_trans_migrating check */
+ if (!page_locked) {
spin_unlock(&mm->page_table_lock);
wait_on_page_locked(page);
page_nid = -1;
goto out;
}
- /* Page is misplaced, serialise migrations and parallel THP splits */
+ /*
+ * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
+ * to serialises splits
+ */
get_page(page);
spin_unlock(&mm->page_table_lock);
- if (!page_locked)
- lock_page(page);
anon_vma = page_lock_anon_vma_read(page);
/* Confirm the PTE did not while locked */
@@ -1334,6 +1355,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unlock;
}
+ /* Bail if we fail to protect against THP splits for any reason */
+ if (unlikely(!anon_vma)) {
+ put_page(page);
+ page_nid = -1;
+ goto clear_pmdnuma;
+ }
+
/*
* Migrate the THP to the requested node, returns with page unlocked
* and pmd_numa cleared.
@@ -1466,20 +1494,24 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
pmd_t entry;
- entry = pmdp_get_and_clear(mm, addr, pmd);
if (!prot_numa) {
+ entry = pmdp_get_and_clear(mm, addr, pmd);
+ if (pmd_numa(entry))
+ entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
BUG_ON(pmd_write(entry));
+ set_pmd_at(mm, addr, pmd, entry);
} else {
struct page *page = pmd_page(*pmd);
+ entry = *pmd;
/* only check non-shared pages */
if (page_mapcount(page) == 1 &&
!pmd_numa(*pmd)) {
entry = pmd_mknuma(entry);
+ set_pmd_at(mm, addr, pmd, entry);
}
}
- set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock);
ret = 1;
}
@@ -1865,7 +1897,7 @@ out:
return ret;
}
-#define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)