diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 115 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 45 | ||||
-rw-r--r-- | mm/migrate.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 33 | ||||
-rw-r--r-- | mm/oom_kill.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 | ||||
-rw-r--r-- | mm/shmem.c | 22 |
8 files changed, 129 insertions, 103 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7685d4a..f342778 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3873,14 +3873,21 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) return val << PAGE_SHIFT; } -static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) +static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, + struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + char str[64]; u64 val; - int type, name; + int type, name, len; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (type) { case _MEM: if (name == RES_USAGE) @@ -3897,7 +3904,9 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) default: BUG(); } - return val; + + len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); + return simple_read_from_buffer(buf, nbytes, ppos, str, len); } /* * The user of this function is... @@ -3913,6 +3922,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (name) { case RES_LIMIT: if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ @@ -3978,12 +3991,15 @@ out: static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); int type, name; - memcg = mem_cgroup_from_cont(cont); type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (name) { case RES_MAX_USAGE: if (type == _MEM) @@ -4624,29 +4640,22 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) #endif /* CONFIG_NUMA */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM -static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) +static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { - /* - * Part of this would be better living in a separate allocation - * function, leaving us with just the cgroup tree population work. - * We, however, depend on state such as network's proto_list that - * is only initialized after cgroup creation. I found the less - * cumbersome way to deal with it to defer it all to populate time - */ - return mem_cgroup_sockets_init(cont, ss); + return mem_cgroup_sockets_init(memcg, ss); }; -static void kmem_cgroup_destroy(struct cgroup *cont) +static void kmem_cgroup_destroy(struct mem_cgroup *memcg) { - mem_cgroup_sockets_destroy(cont); + mem_cgroup_sockets_destroy(memcg); } #else -static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) +static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return 0; } -static void kmem_cgroup_destroy(struct cgroup *cont) +static void kmem_cgroup_destroy(struct mem_cgroup *memcg) { } #endif @@ -4655,7 +4664,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, .register_event = mem_cgroup_usage_register_event, .unregister_event = mem_cgroup_usage_unregister_event, }, @@ -4663,25 +4672,25 @@ static struct cftype mem_cgroup_files[] = { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "stat", @@ -4721,14 +4730,11 @@ static struct cftype mem_cgroup_files[] = { .mode = S_IRUGO, }, #endif -}; - #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP -static struct cftype memsw_cgroup_files[] = { { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, .register_event = mem_cgroup_usage_register_event, .unregister_event = mem_cgroup_usage_unregister_event, }, @@ -4736,35 +4742,23 @@ static struct cftype memsw_cgroup_files[] = { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, -}; - -static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) -{ - if (!do_swap_account) - return 0; - return cgroup_add_files(cont, ss, memsw_cgroup_files, - ARRAY_SIZE(memsw_cgroup_files)); -}; -#else -static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) -{ - return 0; -} #endif + { }, /* terminate */ +}; static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { @@ -5016,6 +5010,17 @@ mem_cgroup_create(struct cgroup *cont) memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); + + error = memcg_init_kmem(memcg, &mem_cgroup_subsys); + if (error) { + /* + * We call put now because our (and parent's) refcnts + * are already in place. mem_cgroup_put() will internally + * call __mem_cgroup_free, so return directly + */ + mem_cgroup_put(memcg); + return ERR_PTR(error); + } return &memcg->css; free_out: __mem_cgroup_free(memcg); @@ -5033,28 +5038,11 @@ static void mem_cgroup_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(cont); + kmem_cgroup_destroy(memcg); mem_cgroup_put(memcg); } -static int mem_cgroup_populate(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - int ret; - - ret = cgroup_add_files(cont, ss, mem_cgroup_files, - ARRAY_SIZE(mem_cgroup_files)); - - if (!ret) - ret = register_memsw_files(cont, ss); - - if (!ret) - ret = register_kmem_files(cont, ss); - - return ret; -} - #ifdef CONFIG_MMU /* Handlers for move charge at task migration. */ #define PRECHARGE_COUNT_AT_ONCE 256 @@ -5638,12 +5626,13 @@ struct cgroup_subsys mem_cgroup_subsys = { .create = mem_cgroup_create, .pre_destroy = mem_cgroup_pre_destroy, .destroy = mem_cgroup_destroy, - .populate = mem_cgroup_populate, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, + .base_cftypes = mem_cgroup_files, .early_init = 0, .use_id = 1, + .__DEPRECATED_clear_css_refs = true, }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP diff --git a/mm/memory.c b/mm/memory.c index 1e77da6..e40f675 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1307,6 +1307,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, if (end <= vma->vm_start) return; + if (vma->vm_file) + uprobe_munmap(vma, start, end); + if (unlikely(is_pfn_mapping(vma))) untrack_pfn_vma(vma, 0, 0); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b195691..88f9422 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -607,27 +607,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return first; } -/* Apply policy to a single VMA */ -static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) -{ - int err = 0; - struct mempolicy *old = vma->vm_policy; - - pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff, - vma->vm_ops, vma->vm_file, - vma->vm_ops ? vma->vm_ops->set_policy : NULL); - - if (vma->vm_ops && vma->vm_ops->set_policy) - err = vma->vm_ops->set_policy(vma, new); - if (!err) { - mpol_get(new); - vma->vm_policy = new; - mpol_put(old); - } - return err; -} - /* Step 2: apply policy to a range and do splits. */ static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) @@ -676,9 +655,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } - err = policy_vma(vma, new_pol); - if (err) - goto out; + + /* + * Apply policy to a single VMA. The reference counting of + * policy for vma_policy linkages has already been handled by + * vma_merge and split_vma as necessary. If this is a shared + * policy then ->set_policy will increment the reference count + * for an sp node. + */ + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + if (vma->vm_ops && vma->vm_ops->set_policy) { + err = vma->vm_ops->set_policy(vma, new_pol); + if (err) + goto out; + } } out: @@ -1334,8 +1327,8 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, * userid as the target process. */ tcred = __task_cred(task); - if (cred->euid != tcred->suid && cred->euid != tcred->uid && - cred->uid != tcred->suid && cred->uid != tcred->uid && + if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; diff --git a/mm/migrate.c b/mm/migrate.c index 1107238..ab81d48 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1371,8 +1371,8 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, * userid as the target process. */ tcred = __task_cred(task); - if (cred->euid != tcred->suid && cred->euid != tcred->uid && - cred->uid != tcred->suid && cred->uid != tcred->uid && + if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; @@ -30,6 +30,7 @@ #include <linux/perf_event.h> #include <linux/audit.h> #include <linux/khugepaged.h> +#include <linux/uprobes.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -546,8 +547,15 @@ again: remove_next = 1 + (end > next->vm_end); if (file) { mapping = file->f_mapping; - if (!(vma->vm_flags & VM_NONLINEAR)) + if (!(vma->vm_flags & VM_NONLINEAR)) { root = &mapping->i_mmap; + uprobe_munmap(vma, vma->vm_start, vma->vm_end); + + if (adjust_next) + uprobe_munmap(next, next->vm_start, + next->vm_end); + } + mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* @@ -617,8 +625,16 @@ again: remove_next = 1 + (end > next->vm_end); if (mapping) mutex_unlock(&mapping->i_mmap_mutex); + if (root) { + uprobe_mmap(vma); + + if (adjust_next) + uprobe_mmap(next); + } + if (remove_next) { if (file) { + uprobe_munmap(next, next->vm_start, next->vm_end); fput(file); if (next->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(mm); @@ -638,6 +654,8 @@ again: remove_next = 1 + (end > next->vm_end); goto again; } } + if (insert && file) + uprobe_mmap(insert); validate_mm(mm); @@ -1371,6 +1389,11 @@ out: mm->locked_vm += (len >> PAGE_SHIFT); } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) make_pages_present(addr, addr + len); + + if (file && uprobe_mmap(vma)) + /* matching probes but cannot insert */ + goto unmap_and_free_vma; + return addr; unmap_and_free_vma: @@ -2358,6 +2381,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; + + if (vma->vm_file && uprobe_mmap(vma)) + return -EINVAL; + vma_link(mm, vma, prev, rb_link, rb_parent); return 0; } @@ -2427,6 +2454,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_pgoff = pgoff; if (new_vma->vm_file) { get_file(new_vma->vm_file); + + if (uprobe_mmap(new_vma)) + goto out_free_mempol; + if (vma->vm_flags & VM_EXECUTABLE) added_exe_file_vma(mm); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 46bf2ed5..9f09a1f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -410,8 +410,8 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas } pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", - task->pid, task_uid(task), task->tgid, - task->mm->total_vm, get_mm_rss(task->mm), + task->pid, from_kuid(&init_user_ns, task_uid(task)), + task->tgid, task->mm->total_vm, get_mm_rss(task->mm), task_cpu(task), task->signal->oom_adj, task->signal->oom_score_adj, task->comm); task_unlock(task); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b7af568..1851df6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4763,12 +4763,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - printk(" %-8s ", zone_names[i]); + printk(KERN_CONT " %-8s ", zone_names[i]); if (arch_zone_lowest_possible_pfn[i] == arch_zone_highest_possible_pfn[i]) - printk("empty\n"); + printk(KERN_CONT "empty\n"); else - printk("%0#10lx -> %0#10lx\n", + printk(KERN_CONT "%0#10lx -> %0#10lx\n", arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); } @@ -2075,6 +2075,8 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, bool remount) { char *this_char, *value, *rest; + uid_t uid; + gid_t gid; while (options != NULL) { this_char = options; @@ -2134,15 +2136,21 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, } else if (!strcmp(this_char,"uid")) { if (remount) continue; - sbinfo->uid = simple_strtoul(value, &rest, 0); + uid = simple_strtoul(value, &rest, 0); if (*rest) goto bad_val; + sbinfo->uid = make_kuid(current_user_ns(), uid); + if (!uid_valid(sbinfo->uid)) + goto bad_val; } else if (!strcmp(this_char,"gid")) { if (remount) continue; - sbinfo->gid = simple_strtoul(value, &rest, 0); + gid = simple_strtoul(value, &rest, 0); if (*rest) goto bad_val; + sbinfo->gid = make_kgid(current_user_ns(), gid); + if (!gid_valid(sbinfo->gid)) + goto bad_val; } else if (!strcmp(this_char,"mpol")) { if (mpol_parse_str(value, &sbinfo->mpol, 1)) goto bad_val; @@ -2210,10 +2218,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); - if (sbinfo->uid != 0) - seq_printf(seq, ",uid=%u", sbinfo->uid); - if (sbinfo->gid != 0) - seq_printf(seq, ",gid=%u", sbinfo->gid); + if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) + seq_printf(seq, ",uid=%u", + from_kuid_munged(&init_user_ns, sbinfo->uid)); + if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", + from_kgid_munged(&init_user_ns, sbinfo->gid)); shmem_show_mpol(seq, sbinfo->mpol); return 0; } |