From acc3c8d15eed6b68c7edf5bfaea884753aaa8e85 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:44:45 -0700 Subject: mm: move mm_populate()-related code to mm/gup.c It's odd that we have populate_vma_page_range() and __mm_populate() in mm/mlock.c. It's implementation of generic memory population and mlocking is one of possible side effect, if VM_LOCKED is set. __get_user_pages() is core of the implementation. Let's move the code into mm/gup.c. Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Acked-by: David Rientjes Cc: Michel Lespinasse Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/gup.c b/mm/gup.c index 1b114ba..ca7b607 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -819,6 +819,124 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, EXPORT_SYMBOL(get_user_pages); /** + * populate_vma_page_range() - populate a range of pages in the vma. + * @vma: target vma + * @start: start address + * @end: end address + * @nonblocking: + * + * This takes care of mlocking the pages too if VM_LOCKED is set. + * + * return 0 on success, negative error code on error. + * + * vma->vm_mm->mmap_sem must be held. + * + * If @nonblocking is NULL, it may be held for read or write and will + * be unperturbed. + * + * If @nonblocking is non-NULL, it must held for read only and may be + * released. If it's released, *@nonblocking will be set to 0. + */ +long populate_vma_page_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, int *nonblocking) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long nr_pages = (end - start) / PAGE_SIZE; + int gup_flags; + + VM_BUG_ON(start & ~PAGE_MASK); + VM_BUG_ON(end & ~PAGE_MASK); + VM_BUG_ON_VMA(start < vma->vm_start, vma); + VM_BUG_ON_VMA(end > vma->vm_end, vma); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); + + gup_flags = FOLL_TOUCH | FOLL_POPULATE; + /* + * We want to touch writable mappings with a write fault in order + * to break COW, except for shared mappings because these don't COW + * and we would not want to dirty them for nothing. + */ + if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) + gup_flags |= FOLL_WRITE; + + /* + * We want mlock to succeed for regions that have any permissions + * other than PROT_NONE. + */ + if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) + gup_flags |= FOLL_FORCE; + + /* + * We made sure addr is within a VMA, so the following will + * not result in a stack expansion that recurses back here. + */ + return __get_user_pages(current, mm, start, nr_pages, gup_flags, + NULL, NULL, nonblocking); +} + +/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +{ + struct mm_struct *mm = current->mm; + unsigned long end, nstart, nend; + struct vm_area_struct *vma = NULL; + int locked = 0; + long ret = 0; + + VM_BUG_ON(start & ~PAGE_MASK); + VM_BUG_ON(len != PAGE_ALIGN(len)); + end = start + len; + + for (nstart = start; nstart < end; nstart = nend) { + /* + * We want to fault in pages for [nstart; end) address range. + * Find first corresponding VMA. + */ + if (!locked) { + locked = 1; + down_read(&mm->mmap_sem); + vma = find_vma(mm, nstart); + } else if (nstart >= vma->vm_end) + vma = vma->vm_next; + if (!vma || vma->vm_start >= end) + break; + /* + * Set [nstart; nend) to intersection of desired address + * range with the first VMA. Also, skip undesirable VMA types. + */ + nend = min(end, vma->vm_end); + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + continue; + if (nstart < vma->vm_start) + nstart = vma->vm_start; + /* + * Now fault in a range of pages. populate_vma_page_range() + * double checks the vma flags, so that it won't mlock pages + * if the vma was already munlocked. + */ + ret = populate_vma_page_range(vma, nstart, nend, &locked); + if (ret < 0) { + if (ignore_errors) { + ret = 0; + continue; /* continue at next VMA */ + } + break; + } + nend = nstart + ret * PAGE_SIZE; + ret = 0; + } + if (locked) + up_read(&mm->mmap_sem); + return ret; /* 0 or negative error code */ +} + +/** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address * diff --git a/mm/mlock.c b/mm/mlock.c index 0214263..6fd2cf1 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -205,62 +205,6 @@ out: return nr_pages - 1; } -/** - * populate_vma_page_range() - populate a range of pages in the vma. - * @vma: target vma - * @start: start address - * @end: end address - * @nonblocking: - * - * This takes care of mlocking the pages too if VM_LOCKED is set. - * - * return 0 on success, negative error code on error. - * - * vma->vm_mm->mmap_sem must be held. - * - * If @nonblocking is NULL, it may be held for read or write and will - * be unperturbed. - * - * If @nonblocking is non-NULL, it must held for read only and may be - * released. If it's released, *@nonblocking will be set to 0. - */ -long populate_vma_page_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, int *nonblocking) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long nr_pages = (end - start) / PAGE_SIZE; - int gup_flags; - - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(end & ~PAGE_MASK); - VM_BUG_ON_VMA(start < vma->vm_start, vma); - VM_BUG_ON_VMA(end > vma->vm_end, vma); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); - - gup_flags = FOLL_TOUCH | FOLL_POPULATE; - /* - * We want to touch writable mappings with a write fault in order - * to break COW, except for shared mappings because these don't COW - * and we would not want to dirty them for nothing. - */ - if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) - gup_flags |= FOLL_WRITE; - - /* - * We want mlock to succeed for regions that have any permissions - * other than PROT_NONE. - */ - if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) - gup_flags |= FOLL_FORCE; - - /* - * We made sure addr is within a VMA, so the following will - * not result in a stack expansion that recurses back here. - */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, - NULL, NULL, nonblocking); -} - /* * convert get_user_pages() return value to posix mlock() error */ @@ -660,68 +604,6 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -/* - * __mm_populate - populate and/or mlock pages within a range of address space. - * - * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap - * flags. VMAs must be already marked with the desired vm_flags, and - * mmap_sem must not be held. - */ -int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) -{ - struct mm_struct *mm = current->mm; - unsigned long end, nstart, nend; - struct vm_area_struct *vma = NULL; - int locked = 0; - long ret = 0; - - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); - end = start + len; - - for (nstart = start; nstart < end; nstart = nend) { - /* - * We want to fault in pages for [nstart; end) address range. - * Find first corresponding VMA. - */ - if (!locked) { - locked = 1; - down_read(&mm->mmap_sem); - vma = find_vma(mm, nstart); - } else if (nstart >= vma->vm_end) - vma = vma->vm_next; - if (!vma || vma->vm_start >= end) - break; - /* - * Set [nstart; nend) to intersection of desired address - * range with the first VMA. Also, skip undesirable VMA types. - */ - nend = min(end, vma->vm_end); - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - continue; - if (nstart < vma->vm_start) - nstart = vma->vm_start; - /* - * Now fault in a range of pages. populate_vma_page_range() - * double checks the vma flags, so that it won't mlock pages - * if the vma was already munlocked. - */ - ret = populate_vma_page_range(vma, nstart, nend, &locked); - if (ret < 0) { - if (ignore_errors) { - ret = 0; - continue; /* continue at next VMA */ - } - break; - } - nend = nstart + ret * PAGE_SIZE; - ret = 0; - } - if (locked) - up_read(&mm->mmap_sem); - return ret; /* 0 or negative error code */ -} - SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { unsigned long locked; -- cgit v0.10.2