summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-01 01:10:18 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-01 01:10:18 (GMT)
commit08615d7d85e5aa02c05bf6c4dde87d940e7f85f6 (patch)
tree18906149d313d25914160aca21cedf54b3a7e818 /fs/proc
parent9fdadb2cbaf4b482dfd6086e8bd3d2db071a1702 (diff)
parent0a4dd35c67b144d8ef9432120105f1aab9293ee9 (diff)
downloadlinux-08615d7d85e5aa02c05bf6c4dde87d940e7f85f6.tar.xz
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton: - the "misc" tree - stuff from all over the map - checkpatch updates - fatfs - kmod changes - procfs - cpumask - UML - kexec - mqueue - rapidio - pidns - some checkpoint-restore feature work. Reluctantly. Most of it delayed a release. I'm still rather worried that we don't have a clear roadmap to completion for this work. * emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches) kconfig: update compression algorithm info c/r: prctl: add ability to set new mm_struct::exe_file c/r: prctl: extend PR_SET_MM to set up more mm_struct entries c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat syscalls, x86: add __NR_kcmp syscall fs, proc: introduce /proc/<pid>/task/<tid>/children entry sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector() eventfd: change int to __u64 in eventfd_signal() fs/nls: add Apple NLS pidns: make killed children autoreap pidns: use task_active_pid_ns in do_notify_parent rapidio/tsi721: add DMA engine support rapidio: add DMA engine support for RIO data transfers ipc/mqueue: add rbtree node caching support tools/selftests: add mq_perf_tests ipc/mqueue: strengthen checks on mqueue creation ipc/mqueue: correct mq_attr_ok test ipc/mqueue: improve performance of send/recv selftests: add mq_open_tests ...
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c147
-rw-r--r--fs/proc/base.c81
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/task_mmu.c82
-rw-r--r--fs/proc/task_nommu.c2
5 files changed, 238 insertions, 77 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index dc4c5a7..c1c207c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -370,7 +370,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
- long priority, nice;
+ int priority, nice;
int tty_pgrp = -1, tty_nr = 0;
sigset_t sigign, sigcatch;
char state;
@@ -492,7 +492,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, ' ', 0);
seq_put_decimal_ull(m, ' ', start_time);
seq_put_decimal_ull(m, ' ', vsize);
- seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0);
+ seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
seq_put_decimal_ull(m, ' ', rsslim);
seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
@@ -517,9 +517,23 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0);
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0);
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0);
+
+ if (mm && permitted) {
+ seq_put_decimal_ull(m, ' ', mm->start_data);
+ seq_put_decimal_ull(m, ' ', mm->end_data);
+ seq_put_decimal_ull(m, ' ', mm->start_brk);
+ seq_put_decimal_ull(m, ' ', mm->arg_start);
+ seq_put_decimal_ull(m, ' ', mm->arg_end);
+ seq_put_decimal_ull(m, ' ', mm->env_start);
+ seq_put_decimal_ull(m, ' ', mm->env_end);
+ } else
+ seq_printf(m, " 0 0 0 0 0 0 0");
+
+ if (permitted)
+ seq_put_decimal_ll(m, ' ', task->exit_code);
+ else
+ seq_put_decimal_ll(m, ' ', 0);
+
seq_putc(m, '\n');
if (mm)
mmput(mm);
@@ -565,3 +579,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct pid *
+get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
+{
+ struct task_struct *start, *task;
+ struct pid *pid = NULL;
+
+ read_lock(&tasklist_lock);
+
+ start = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (!start)
+ goto out;
+
+ /*
+ * Lets try to continue searching first, this gives
+ * us significant speedup on children-rich processes.
+ */
+ if (pid_prev) {
+ task = pid_task(pid_prev, PIDTYPE_PID);
+ if (task && task->real_parent == start &&
+ !(list_empty(&task->sibling))) {
+ if (list_is_last(&task->sibling, &start->children))
+ goto out;
+ task = list_first_entry(&task->sibling,
+ struct task_struct, sibling);
+ pid = get_pid(task_pid(task));
+ goto out;
+ }
+ }
+
+ /*
+ * Slow search case.
+ *
+ * We might miss some children here if children
+ * are exited while we were not holding the lock,
+ * but it was never promised to be accurate that
+ * much.
+ *
+ * "Just suppose that the parent sleeps, but N children
+ * exit after we printed their tids. Now the slow paths
+ * skips N extra children, we miss N tasks." (c)
+ *
+ * So one need to stop or freeze the leader and all
+ * its children to get a precise result.
+ */
+ list_for_each_entry(task, &start->children, sibling) {
+ if (pos-- == 0) {
+ pid = get_pid(task_pid(task));
+ break;
+ }
+ }
+
+out:
+ read_unlock(&tasklist_lock);
+ return pid;
+}
+
+static int children_seq_show(struct seq_file *seq, void *v)
+{
+ struct inode *inode = seq->private;
+ pid_t pid;
+
+ pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
+ return seq_printf(seq, "%d ", pid);
+}
+
+static void *children_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return get_children_pid(seq->private, NULL, *pos);
+}
+
+static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct pid *pid;
+
+ pid = get_children_pid(seq->private, v, *pos + 1);
+ put_pid(v);
+
+ ++*pos;
+ return pid;
+}
+
+static void children_seq_stop(struct seq_file *seq, void *v)
+{
+ put_pid(v);
+}
+
+static const struct seq_operations children_seq_ops = {
+ .start = children_seq_start,
+ .next = children_seq_next,
+ .stop = children_seq_stop,
+ .show = children_seq_show,
+};
+
+static int children_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &children_seq_ops);
+ if (ret)
+ return ret;
+
+ m = file->private_data;
+ m->private = inode;
+
+ return ret;
+}
+
+int children_seq_release(struct inode *inode, struct file *file)
+{
+ seq_release(inode, file);
+ return 0;
+}
+
+const struct file_operations proc_tid_children_operations = {
+ .open = children_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = children_seq_release,
+};
+#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d7d7118..616f41a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -199,11 +199,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-struct mm_struct *mm_for_maps(struct task_struct *task)
-{
- return mm_access(task, PTRACE_MODE_READ);
-}
-
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
@@ -243,7 +238,7 @@ out:
static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
- struct mm_struct *mm = mm_for_maps(task);
+ struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
@@ -679,7 +674,7 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-static int mem_open(struct inode* inode, struct file* file)
+static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
struct mm_struct *mm;
@@ -687,7 +682,7 @@ static int mem_open(struct inode* inode, struct file* file)
if (!task)
return -ESRCH;
- mm = mm_access(task, PTRACE_MODE_ATTACH);
+ mm = mm_access(task, mode);
put_task_struct(task);
if (IS_ERR(mm))
@@ -707,6 +702,11 @@ static int mem_open(struct inode* inode, struct file* file)
return 0;
}
+static int mem_open(struct inode *inode, struct file *file)
+{
+ return __mem_open(inode, file, PTRACE_MODE_ATTACH);
+}
+
static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write)
{
@@ -803,30 +803,29 @@ static const struct file_operations proc_mem_operations = {
.release = mem_release,
};
+static int environ_open(struct inode *inode, struct file *file)
+{
+ return __mem_open(inode, file, PTRACE_MODE_READ);
+}
+
static ssize_t environ_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
char *page;
unsigned long src = *ppos;
- int ret = -ESRCH;
- struct mm_struct *mm;
+ int ret = 0;
+ struct mm_struct *mm = file->private_data;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out;
-
-
- mm = mm_for_maps(task);
- ret = PTR_ERR(mm);
- if (!mm || IS_ERR(mm))
- goto out_free;
+ return -ENOMEM;
ret = 0;
+ if (!atomic_inc_not_zero(&mm->mm_users))
+ goto free;
while (count > 0) {
int this_len, retval, max_len;
@@ -838,7 +837,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
this_len = (this_len > max_len) ? max_len : this_len;
- retval = access_process_vm(task, (mm->env_start + src),
+ retval = access_remote_vm(mm, (mm->env_start + src),
page, this_len, 0);
if (retval <= 0) {
@@ -857,19 +856,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
count -= retval;
}
*ppos = src;
-
mmput(mm);
-out_free:
+
+free:
free_page((unsigned long) page);
-out:
- put_task_struct(task);
-out_no_task:
return ret;
}
static const struct file_operations proc_environ_operations = {
+ .open = environ_open,
.read = environ_read,
.llseek = generic_file_llseek,
+ .release = mem_release,
};
static ssize_t oom_adjust_read(struct file *file, char __user *buf,
@@ -1850,7 +1848,7 @@ static const struct dentry_operations tid_fd_dentry_operations =
static struct dentry *proc_fd_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- unsigned fd = *(const unsigned *)ptr;
+ unsigned fd = (unsigned long)ptr;
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
@@ -1887,7 +1885,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
if (fd == ~0U)
goto out;
- result = instantiate(dir, dentry, task, &fd);
+ result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
out:
put_task_struct(task);
out_no_task:
@@ -1930,21 +1928,22 @@ static int proc_readfd_common(struct file * filp, void * dirent,
fd++, filp->f_pos++) {
char name[PROC_NUMBUF];
int len;
+ int rv;
if (!fcheck_files(files, fd))
continue;
rcu_read_unlock();
len = snprintf(name, sizeof(name), "%d", fd);
- if (proc_fill_cache(filp, dirent, filldir,
- name, len, instantiate,
- p, &fd) < 0) {
- rcu_read_lock();
- break;
- }
+ rv = proc_fill_cache(filp, dirent, filldir,
+ name, len, instantiate, p,
+ (void *)(unsigned long)fd);
+ if (rv < 0)
+ goto out_fd_loop;
rcu_read_lock();
}
rcu_read_unlock();
+out_fd_loop:
put_files_struct(files);
}
out:
@@ -2024,11 +2023,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
if (!task)
goto out_notask;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
- mm = get_task_mm(task);
- if (!mm)
+ mm = mm_access(task, PTRACE_MODE_READ);
+ if (IS_ERR_OR_NULL(mm))
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
@@ -2357,7 +2353,7 @@ static const struct inode_operations proc_fd_inode_operations = {
static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- unsigned fd = *(unsigned *)ptr;
+ unsigned fd = (unsigned long)ptr;
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
@@ -3404,6 +3400,9 @@ static const struct pid_entry tid_base_stuff[] = {
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_tid_maps_operations),
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ REG("children", S_IRUGO, proc_tid_children_operations),
+#endif
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
#endif
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5f79bb8..eca4aca 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -31,8 +31,6 @@ struct vmalloc_info {
unsigned long largest_chunk;
};
-extern struct mm_struct *mm_for_maps(struct task_struct *);
-
#ifdef CONFIG_MMU
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
extern void get_vmalloc_info(struct vmalloc_info *vmi);
@@ -56,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+extern const struct file_operations proc_tid_children_operations;
extern const struct file_operations proc_pid_maps_operations;
extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7faaf2a..4540b8f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -125,7 +125,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = mm_for_maps(priv->task);
+ mm = mm_access(priv->task, PTRACE_MODE_READ);
if (!mm || IS_ERR(mm))
return mm;
down_read(&mm->mmap_sem);
@@ -393,6 +393,7 @@ struct mem_size_stats {
unsigned long anonymous;
unsigned long anonymous_thp;
unsigned long swap;
+ unsigned long nonlinear;
u64 pss;
};
@@ -402,24 +403,33 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = mss->vma;
- struct page *page;
+ pgoff_t pgoff = linear_page_index(vma, addr);
+ struct page *page = NULL;
int mapcount;
- if (is_swap_pte(ptent)) {
- mss->swap += ptent_size;
- return;
+ if (pte_present(ptent)) {
+ page = vm_normal_page(vma, addr, ptent);
+ } else if (is_swap_pte(ptent)) {
+ swp_entry_t swpent = pte_to_swp_entry(ptent);
+
+ if (!non_swap_entry(swpent))
+ mss->swap += ptent_size;
+ else if (is_migration_entry(swpent))
+ page = migration_entry_to_page(swpent);
+ } else if (pte_file(ptent)) {
+ if (pte_to_pgoff(ptent) != pgoff)
+ mss->nonlinear += ptent_size;
}
- if (!pte_present(ptent))
- return;
-
- page = vm_normal_page(vma, addr, ptent);
if (!page)
return;
if (PageAnon(page))
mss->anonymous += ptent_size;
+ if (page->index != pgoff)
+ mss->nonlinear += ptent_size;
+
mss->resident += ptent_size;
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
@@ -521,6 +531,10 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
(vma->vm_flags & VM_LOCKED) ?
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
+ if (vma->vm_flags & VM_NONLINEAR)
+ seq_printf(m, "Nonlinear: %8lu kB\n",
+ mss.nonlinear >> 10);
+
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
@@ -700,6 +714,7 @@ struct pagemapread {
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
+#define PM_FILE PM_STATUS(1LL)
#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
#define PM_END_OF_BUFFER 1
@@ -733,22 +748,33 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
return err;
}
-static u64 swap_pte_to_pagemap_entry(pte_t pte)
+static void pte_to_pagemap_entry(pagemap_entry_t *pme,
+ struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- swp_entry_t e = pte_to_swp_entry(pte);
- return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
-}
-
-static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
-{
- if (is_swap_pte(pte))
- *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
- else if (pte_present(pte))
- *pme = make_pme(PM_PFRAME(pte_pfn(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
- else
+ u64 frame, flags;
+ struct page *page = NULL;
+
+ if (pte_present(pte)) {
+ frame = pte_pfn(pte);
+ flags = PM_PRESENT;
+ page = vm_normal_page(vma, addr, pte);
+ } else if (is_swap_pte(pte)) {
+ swp_entry_t entry = pte_to_swp_entry(pte);
+
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ flags = PM_SWAP;
+ if (is_migration_entry(entry))
+ page = migration_entry_to_page(entry);
+ } else {
*pme = make_pme(PM_NOT_PRESENT);
+ return;
+ }
+
+ if (page && !PageAnon(page))
+ flags |= PM_FILE;
+
+ *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -815,7 +841,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, *pte);
+ pte_to_pagemap_entry(&pme, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
@@ -869,11 +895,11 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* For each page in the address space, this file contains one 64-bit entry
* consisting of the following:
*
- * Bits 0-55 page frame number (PFN) if present
+ * Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
- * Bits 5-55 swap offset if swapped
+ * Bits 5-54 swap offset if swapped
* Bits 55-60 page shift (page size = 1<<page shift)
- * Bit 61 reserved for future use
+ * Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
*
@@ -919,7 +945,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!pm.buffer)
goto out_task;
- mm = mm_for_maps(task);
+ mm = mm_access(task, PTRACE_MODE_READ);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_free;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 74fe164..1ccfa53 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -223,7 +223,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = mm_for_maps(priv->task);
+ mm = mm_access(priv->task, PTRACE_MODE_READ);
if (!mm || IS_ERR(mm)) {
put_task_struct(priv->task);
priv->task = NULL;