diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 219 | ||||
-rw-r--r-- | mm/hugetlb.c | 6 | ||||
-rw-r--r-- | mm/memcontrol.c | 190 | ||||
-rw-r--r-- | mm/migrate.c | 9 | ||||
-rw-r--r-- | mm/mmap.c | 24 | ||||
-rw-r--r-- | mm/nommu.c | 23 | ||||
-rw-r--r-- | mm/page-writeback.c | 77 | ||||
-rw-r--r-- | mm/page_alloc.c | 47 | ||||
-rw-r--r-- | mm/readahead.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 14 | ||||
-rw-r--r-- | mm/slub.c | 58 | ||||
-rw-r--r-- | mm/sparse.c | 19 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 6 | ||||
-rw-r--r-- | mm/vmalloc.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 11 | ||||
-rw-r--r-- | mm/vmstat.c | 5 |
18 files changed, 547 insertions, 176 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e8644b1..7c4f9e0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -4,12 +4,229 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/module.h> +#include <linux/writeback.h> +#include <linux/device.h> + + +static struct class *bdi_class; + +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +static struct dentry *bdi_debug_root; + +static void bdi_debug_init(void) +{ + bdi_debug_root = debugfs_create_dir("bdi", NULL); +} + +static int bdi_debug_stats_show(struct seq_file *m, void *v) +{ + struct backing_dev_info *bdi = m->private; + long background_thresh; + long dirty_thresh; + long bdi_thresh; + + get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + +#define K(x) ((x) << (PAGE_SHIFT - 10)) + seq_printf(m, + "BdiWriteback: %8lu kB\n" + "BdiReclaimable: %8lu kB\n" + "BdiDirtyThresh: %8lu kB\n" + "DirtyThresh: %8lu kB\n" + "BackgroundThresh: %8lu kB\n", + (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), + (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), + K(bdi_thresh), + K(dirty_thresh), + K(background_thresh)); +#undef K + + return 0; +} + +static int bdi_debug_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, bdi_debug_stats_show, inode->i_private); +} + +static const struct file_operations bdi_debug_stats_fops = { + .open = bdi_debug_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +{ + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, + bdi, &bdi_debug_stats_fops); +} + +static void bdi_debug_unregister(struct backing_dev_info *bdi) +{ + debugfs_remove(bdi->debug_stats); + debugfs_remove(bdi->debug_dir); +} +#else +static inline void bdi_debug_init(void) +{ +} +static inline void bdi_debug_register(struct backing_dev_info *bdi, + const char *name) +{ +} +static inline void bdi_debug_unregister(struct backing_dev_info *bdi) +{ +} +#endif + +static ssize_t read_ahead_kb_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned long read_ahead_kb; + ssize_t ret = -EINVAL; + + read_ahead_kb = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); + ret = count; + } + return ret; +} + +#define K(pages) ((pages) << (PAGE_SHIFT - 10)) + +#define BDI_SHOW(name, expr) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *page) \ +{ \ + struct backing_dev_info *bdi = dev_get_drvdata(dev); \ + \ + return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ +} + +BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) + +static ssize_t min_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_min_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(min_ratio, bdi->min_ratio) + +static ssize_t max_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_max_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(max_ratio, bdi->max_ratio) + +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) + +static struct device_attribute bdi_dev_attrs[] = { + __ATTR_RW(read_ahead_kb), + __ATTR_RW(min_ratio), + __ATTR_RW(max_ratio), + __ATTR_NULL, +}; + +static __init int bdi_class_init(void) +{ + bdi_class = class_create(THIS_MODULE, "bdi"); + bdi_class->dev_attrs = bdi_dev_attrs; + bdi_debug_init(); + return 0; +} + +postcore_initcall(bdi_class_init); + +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...) +{ + char *name; + va_list args; + int ret = 0; + struct device *dev; + + va_start(args, fmt); + name = kvasprintf(GFP_KERNEL, fmt, args); + va_end(args); + + if (!name) + return -ENOMEM; + + dev = device_create(bdi_class, parent, MKDEV(0, 0), name); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto exit; + } + + bdi->dev = dev; + dev_set_drvdata(bdi->dev, bdi); + bdi_debug_register(bdi, name); + +exit: + kfree(name); + return ret; +} +EXPORT_SYMBOL(bdi_register); + +int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) +{ + return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); +} +EXPORT_SYMBOL(bdi_register_dev); + +void bdi_unregister(struct backing_dev_info *bdi) +{ + if (bdi->dev) { + bdi_debug_unregister(bdi); + device_unregister(bdi->dev); + bdi->dev = NULL; + } +} +EXPORT_SYMBOL(bdi_unregister); int bdi_init(struct backing_dev_info *bdi) { int i; int err; + bdi->dev = NULL; + + bdi->min_ratio = 0; + bdi->max_ratio = 100; + bdi->max_prop_frac = PROP_FRAC_BASE; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) @@ -33,6 +250,8 @@ void bdi_destroy(struct backing_dev_info *bdi) { int i; + bdi_unregister(bdi); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2c37c67..bbf953e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -199,7 +199,8 @@ static struct page *alloc_fresh_huge_page_node(int nid) struct page *page; page = alloc_pages_node(nid, - htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, + htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| + __GFP_REPEAT|__GFP_NOWARN, HUGETLB_PAGE_ORDER); if (page) { if (arch_prepare_hugepage(page)) { @@ -294,7 +295,8 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, } spin_unlock(&hugetlb_lock); - page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, + page = alloc_pages(htlb_alloc_mask|__GFP_COMP| + __GFP_REPEAT|__GFP_NOWARN, HUGETLB_PAGE_ORDER); spin_lock(&hugetlb_lock); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e0bfc9..e46451e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -26,15 +26,18 @@ #include <linux/backing-dev.h> #include <linux/bit_spinlock.h> #include <linux/rcupdate.h> +#include <linux/slab.h> #include <linux/swap.h> #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> struct cgroup_subsys mem_cgroup_subsys; static const int MEM_CGROUP_RECLAIM_RETRIES = 5; +static struct kmem_cache *page_cgroup_cache; /* * Statistics for memory cgroup. @@ -45,6 +48,8 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ MEM_CGROUP_STAT_NSTATS, }; @@ -196,6 +201,13 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); else __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); + + if (charge) + __mem_cgroup_stat_add_safe(stat, + MEM_CGROUP_STAT_PGPGIN_COUNT, 1); + else + __mem_cgroup_stat_add_safe(stat, + MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); } static struct mem_cgroup_per_zone * @@ -236,26 +248,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) css); } -static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } -void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) -{ - struct mem_cgroup *mem; - - mem = mem_cgroup_from_task(p); - css_get(&mem->css); - mm->mem_cgroup = mem; -} - -void mm_free_cgroup(struct mm_struct *mm) -{ - css_put(&mm->mem_cgroup->css); -} - static inline int page_cgroup_locked(struct page *page) { return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); @@ -287,10 +285,10 @@ static void unlock_page_cgroup(struct page *page) bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -static void __mem_cgroup_remove_list(struct page_cgroup *pc) +static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (from) MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; @@ -301,10 +299,10 @@ static void __mem_cgroup_remove_list(struct page_cgroup *pc) list_del_init(&pc->lru); } -static void __mem_cgroup_add_list(struct page_cgroup *pc) +static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (!to) { MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; @@ -476,6 +474,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; + BUG_ON(!mem_cont); mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); if (active) src = &mz->active_list; @@ -560,7 +559,7 @@ retry: } unlock_page_cgroup(page); - pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); + pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); if (pc == NULL) goto err; @@ -574,7 +573,7 @@ retry: mm = &init_mm; rcu_read_lock(); - mem = rcu_dereference(mm->mem_cgroup); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); /* * For every charge from the cgroup, increment reference count */ @@ -602,7 +601,6 @@ retry: mem_cgroup_out_of_memory(mem, gfp_mask); goto out; } - congestion_wait(WRITE, HZ/10); } pc->ref_cnt = 1; @@ -610,7 +608,7 @@ retry: pc->page = page; pc->flags = PAGE_CGROUP_FLAG_ACTIVE; if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) - pc->flags |= PAGE_CGROUP_FLAG_CACHE; + pc->flags = PAGE_CGROUP_FLAG_CACHE; lock_page_cgroup(page); if (page_get_page_cgroup(page)) { @@ -622,14 +620,14 @@ retry: */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); goto retry; } page_assign_page_cgroup(page, pc); mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(page); @@ -637,7 +635,7 @@ done: return 0; out: css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); err: return -ENOMEM; } @@ -685,7 +683,7 @@ void mem_cgroup_uncharge_page(struct page *page) if (--(pc->ref_cnt) == 0) { mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -695,7 +693,7 @@ void mem_cgroup_uncharge_page(struct page *page) res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); return; } @@ -747,7 +745,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -759,7 +757,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(newpage); @@ -853,13 +851,10 @@ static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) return 0; } -static ssize_t mem_cgroup_read(struct cgroup *cont, - struct cftype *cft, struct file *file, - char __user *userbuf, size_t nbytes, loff_t *ppos) +static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { - return res_counter_read(&mem_cgroup_from_cont(cont)->res, - cft->private, userbuf, nbytes, ppos, - NULL); + return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, + cft->private); } static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, @@ -871,27 +866,25 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } -static ssize_t mem_force_empty_write(struct cgroup *cont, - struct cftype *cft, struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - int ret = mem_cgroup_force_empty(mem); - if (!ret) - ret = nbytes; - return ret; + struct mem_cgroup *mem; + + mem = mem_cgroup_from_cont(cont); + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&mem->res); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&mem->res); + break; + } + return 0; } -/* - * Note: This should be removed if cgroup supports write-only file. - */ -static ssize_t mem_force_empty_read(struct cgroup *cont, - struct cftype *cft, - struct file *file, char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_force_empty_write(struct cgroup *cont, unsigned int event) { - return -EINVAL; + return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); } static const struct mem_cgroup_stat_desc { @@ -900,11 +893,13 @@ static const struct mem_cgroup_stat_desc { } mem_cgroup_stat_desc[] = { [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, + [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, + [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, }, }; -static int mem_control_stat_show(struct seq_file *m, void *arg) +static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb) { - struct cgroup *cont = m->private; struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); struct mem_cgroup_stat *stat = &mem_cont->stat; int i; @@ -914,8 +909,7 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) val = mem_cgroup_read_stat(stat, i); val *= mem_cgroup_stat_desc[i].unit; - seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, - (long long)val); + cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); } /* showing # of active pages */ { @@ -925,52 +919,43 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) MEM_CGROUP_ZSTAT_INACTIVE); active = mem_cgroup_get_all_zonestat(mem_cont, MEM_CGROUP_ZSTAT_ACTIVE); - seq_printf(m, "active %ld\n", (active) * PAGE_SIZE); - seq_printf(m, "inactive %ld\n", (inactive) * PAGE_SIZE); + cb->fill(cb, "active", (active) * PAGE_SIZE); + cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); } return 0; } -static const struct file_operations mem_control_stat_file_operations = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mem_control_stat_open(struct inode *unused, struct file *file) -{ - /* XXX __d_cont */ - struct cgroup *cont = file->f_dentry->d_parent->d_fsdata; - - file->f_op = &mem_control_stat_file_operations; - return single_open(file, mem_control_stat_show, cont); -} - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = RES_USAGE, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, + }, + { + .name = "max_usage_in_bytes", + .private = RES_MAX_USAGE, + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = RES_LIMIT, .write = mem_cgroup_write, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, }, { .name = "failcnt", .private = RES_FAILCNT, - .read = mem_cgroup_read, + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, }, { .name = "force_empty", - .write = mem_force_empty_write, - .read = mem_force_empty_read, + .trigger = mem_force_empty_write, }, { .name = "stat", - .open = mem_control_stat_open, + .read_map = mem_control_stat_show, }, }; @@ -1010,6 +995,29 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) kfree(mem->info.nodeinfo[node]); } +static struct mem_cgroup *mem_cgroup_alloc(void) +{ + struct mem_cgroup *mem; + + if (sizeof(*mem) < PAGE_SIZE) + mem = kmalloc(sizeof(*mem), GFP_KERNEL); + else + mem = vmalloc(sizeof(*mem)); + + if (mem) + memset(mem, 0, sizeof(*mem)); + return mem; +} + +static void mem_cgroup_free(struct mem_cgroup *mem) +{ + if (sizeof(*mem) < PAGE_SIZE) + kfree(mem); + else + vfree(mem); +} + + static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -1018,17 +1026,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) if (unlikely((cont->parent) == NULL)) { mem = &init_mem_cgroup; - init_mm.mem_cgroup = mem; - } else - mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); - - if (mem == NULL) - return ERR_PTR(-ENOMEM); + page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); + } else { + mem = mem_cgroup_alloc(); + if (!mem) + return ERR_PTR(-ENOMEM); + } res_counter_init(&mem->res); - memset(&mem->info, 0, sizeof(mem->info)); - for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; @@ -1038,7 +1044,7 @@ free_out: for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); if (cont->parent != NULL) - kfree(mem); + mem_cgroup_free(mem); return ERR_PTR(-ENOMEM); } @@ -1058,7 +1064,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); - kfree(mem_cgroup_from_cont(cont)); + mem_cgroup_free(mem_cgroup_from_cont(cont)); } static int mem_cgroup_populate(struct cgroup_subsys *ss, @@ -1098,10 +1104,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, if (!thread_group_leader(p)) goto out; - css_get(&mem->css); - rcu_assign_pointer(mm->mem_cgroup, mem); - css_put(&old_mem->css); - out: mmput(mm); } diff --git a/mm/migrate.c b/mm/migrate.c index 4e0eccc..449d77d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -383,7 +383,14 @@ static void migrate_page_copy(struct page *newpage, struct page *page) if (PageDirty(page)) { clear_page_dirty_for_io(page); - set_page_dirty(newpage); + /* + * Want to mark the page and the radix tree as dirty, and + * redo the accounting that clear_page_dirty_for_io undid, + * but we can't use set_page_dirty because that function + * is actually a signal that all of the page has become dirty. + * Wheras only part of our page may be dirty. + */ + __set_page_dirty_nobuffers(newpage); } #ifdef CONFIG_SWAP @@ -230,8 +230,11 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next; @@ -623,8 +626,11 @@ again: remove_next = 1 + (end > next->vm_end); spin_unlock(&mapping->i_mmap_lock); if (remove_next) { - if (file) + if (file) { fput(file); + if (next->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } mm->map_count--; mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); @@ -1154,6 +1160,8 @@ munmap_back: error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; + if (vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) @@ -1185,6 +1193,8 @@ munmap_back: mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); fput(file); + if (vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); } else { vma_link(mm, vma, prev, rb_link, rb_parent); file = vma->vm_file; @@ -1817,8 +1827,11 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, } vma_set_policy(new, pol); - if (new->vm_file) + if (new->vm_file) { get_file(new->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); @@ -2135,8 +2148,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; - if (new_vma->vm_file) + if (new_vma->vm_file) { get_file(new_vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); @@ -966,8 +966,13 @@ unsigned long do_mmap_pgoff(struct file *file, INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); - if (file) + if (file) { get_file(file); + if (vm_flags & VM_EXECUTABLE) { + added_exe_file_vma(current->mm); + vma->vm_mm = current->mm; + } + } vma->vm_file = file; vma->vm_flags = vm_flags; vma->vm_start = addr; @@ -1022,8 +1027,11 @@ unsigned long do_mmap_pgoff(struct file *file, up_write(&nommu_vma_sem); kfree(vml); if (vma) { - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } kfree(vma); } return ret; @@ -1053,7 +1061,7 @@ EXPORT_SYMBOL(do_mmap_pgoff); /* * handle mapping disposal for uClinux */ -static void put_vma(struct vm_area_struct *vma) +static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) { if (vma) { down_write(&nommu_vma_sem); @@ -1075,8 +1083,11 @@ static void put_vma(struct vm_area_struct *vma) realalloc -= kobjsize(vma); askedalloc -= sizeof(*vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } kfree(vma); } @@ -1113,7 +1124,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) found: vml = *parent; - put_vma(vml->vma); + put_vma(mm, vml->vma); *parent = vml->next; realalloc -= kobjsize(vml); @@ -1158,7 +1169,7 @@ void exit_mmap(struct mm_struct * mm) while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; - put_vma(tmp->vma); + put_vma(mm, tmp->vma); realalloc -= kobjsize(tmp); askedalloc -= sizeof(*tmp); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5e00f17..789b6ad 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -164,9 +164,20 @@ int dirty_ratio_handler(struct ctl_table *table, int write, */ static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) { - __prop_inc_percpu(&vm_completions, &bdi->completions); + __prop_inc_percpu_max(&vm_completions, &bdi->completions, + bdi->max_prop_frac); } +void bdi_writeout_inc(struct backing_dev_info *bdi) +{ + unsigned long flags; + + local_irq_save(flags); + __bdi_writeout_inc(bdi); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(bdi_writeout_inc); + static inline void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); @@ -200,7 +211,8 @@ clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty) avail_dirty = dirty - (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_WRITEBACK) + - global_page_state(NR_UNSTABLE_NFS)); + global_page_state(NR_UNSTABLE_NFS) + + global_page_state(NR_WRITEBACK_TEMP)); if (avail_dirty < 0) avail_dirty = 0; @@ -243,6 +255,55 @@ static void task_dirty_limit(struct task_struct *tsk, long *pdirty) } /* + * + */ +static DEFINE_SPINLOCK(bdi_lock); +static unsigned int bdi_min_ratio; + +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&bdi_lock, flags); + if (min_ratio > bdi->max_ratio) { + ret = -EINVAL; + } else { + min_ratio -= bdi->min_ratio; + if (bdi_min_ratio + min_ratio < 100) { + bdi_min_ratio += min_ratio; + bdi->min_ratio += min_ratio; + } else { + ret = -EINVAL; + } + } + spin_unlock_irqrestore(&bdi_lock, flags); + + return ret; +} + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +{ + unsigned long flags; + int ret = 0; + + if (max_ratio > 100) + return -EINVAL; + + spin_lock_irqsave(&bdi_lock, flags); + if (bdi->min_ratio > max_ratio) { + ret = -EINVAL; + } else { + bdi->max_ratio = max_ratio; + bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; + } + spin_unlock_irqrestore(&bdi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(bdi_set_max_ratio); + +/* * Work out the current dirty-memory clamping and background writeout * thresholds. * @@ -300,7 +361,7 @@ static unsigned long determine_dirtyable_memory(void) return x + 1; /* Ensure that we never return 0 */ } -static void +void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, struct backing_dev_info *bdi) { @@ -330,7 +391,7 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, *pdirty = dirty; if (bdi) { - u64 bdi_dirty = dirty; + u64 bdi_dirty; long numerator, denominator; /* @@ -338,8 +399,12 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, */ bdi_writeout_fraction(bdi, &numerator, &denominator); + bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; bdi_dirty *= numerator; do_div(bdi_dirty, denominator); + bdi_dirty += (dirty * bdi->min_ratio) / 100; + if (bdi_dirty > (dirty * bdi->max_ratio) / 100) + bdi_dirty = dirty * bdi->max_ratio / 100; *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); @@ -1192,7 +1257,7 @@ int test_clear_page_writeback(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_writeback_dirty(bdi)) { + if (bdi_cap_account_writeback(bdi)) { __dec_bdi_stat(bdi, BDI_WRITEBACK); __bdi_writeout_inc(bdi); } @@ -1221,7 +1286,7 @@ int test_set_page_writeback(struct page *page) radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_writeback_dirty(bdi)) + if (bdi_cap_account_writeback(bdi)) __inc_bdi_stat(bdi, BDI_WRITEBACK); } if (!PageDirty(page)) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d1cf4f0..bdd5c43 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -45,6 +45,7 @@ #include <linux/fault-inject.h> #include <linux/page-isolation.h> #include <linux/memcontrol.h> +#include <linux/debugobjects.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -532,8 +533,11 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (reserved) return; - if (!PageHighMem(page)) + if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); + debug_check_no_obj_freed(page_address(page), + PAGE_SIZE << order); + } arch_free_page(page, order); kernel_map_pages(page, 1 << order, 0); @@ -995,8 +999,10 @@ static void free_hot_cold_page(struct page *page, int cold) if (free_pages_check(page)) return; - if (!PageHighMem(page)) + if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), PAGE_SIZE); + debug_check_no_obj_freed(page_address(page), PAGE_SIZE); + } arch_free_page(page, 0); kernel_map_pages(page, 1, 0); @@ -1461,7 +1467,8 @@ __alloc_pages_internal(gfp_t gfp_mask, unsigned int order, struct task_struct *p = current; int do_retry; int alloc_flags; - int did_some_progress; + unsigned long did_some_progress; + unsigned long pages_reclaimed = 0; might_sleep_if(wait); @@ -1611,14 +1618,26 @@ nofail_alloc: * Don't let big-order allocations loop unless the caller explicitly * requests that. Wait for some write requests to complete then retry. * - * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order - * <= 3, but that may not be true in other implementations. + * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER + * means __GFP_NOFAIL, but that may not be true in other + * implementations. + * + * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is + * specified, then we retry until we no longer reclaim any pages + * (above), or we've reclaimed an order of pages at least as + * large as the allocation's order. In both cases, if the + * allocation still fails, we stop retrying. */ + pages_reclaimed += did_some_progress; do_retry = 0; if (!(gfp_mask & __GFP_NORETRY)) { - if ((order <= PAGE_ALLOC_COSTLY_ORDER) || - (gfp_mask & __GFP_REPEAT)) + if (order <= PAGE_ALLOC_COSTLY_ORDER) { do_retry = 1; + } else { + if (gfp_mask & __GFP_REPEAT && + pages_reclaimed < (1 << order)) + do_retry = 1; + } if (gfp_mask & __GFP_NOFAIL) do_retry = 1; } @@ -2524,7 +2543,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, struct page *page; unsigned long end_pfn = start_pfn + size; unsigned long pfn; + struct zone *z; + z = &NODE_DATA(nid)->node_zones[zone]; for (pfn = start_pfn; pfn < end_pfn; pfn++) { /* * There can be holes in boot-time mem_map[]s @@ -2542,7 +2563,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); - /* * Mark the block movable so that blocks are reserved for * movable at startup. This will force kernel allocations @@ -2551,8 +2571,15 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * kernel allocations are made. Later some blocks near * the start are marked MIGRATE_RESERVE by * setup_zone_migrate_reserve() + * + * bitmap is created for zone's valid pfn range. but memmap + * can be created for invalid pages (for alignment) + * check here not to call set_pageblock_migratetype() against + * pfn out of zone. */ - if ((pfn & (pageblock_nr_pages-1))) + if ((z->zone_start_pfn <= pfn) + && (pfn < z->zone_start_pfn + z->spanned_pages) + && !(pfn & (pageblock_nr_pages - 1))) set_pageblock_migratetype(page, MIGRATE_MOVABLE); INIT_LIST_HEAD(&page->lru); @@ -4464,6 +4491,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); + VM_BUG_ON(pfn < zone->zone_start_pfn); + VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) diff --git a/mm/readahead.c b/mm/readahead.c index 8762e89..d8723a5 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -235,7 +235,13 @@ unsigned long max_sane_readahead(unsigned long nr) static int __init readahead_init(void) { - return bdi_init(&default_backing_dev_info); + int err; + + err = bdi_init(&default_backing_dev_info); + if (!err) + bdi_register(&default_backing_dev_info, NULL, "default"); + + return err; } subsys_initcall(readahead_init); @@ -201,7 +201,7 @@ static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info __read_mostly = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, .unplug_io_fn = default_unplug_io_fn, }; @@ -110,6 +110,7 @@ #include <linux/fault-inject.h> #include <linux/rtmutex.h> #include <linux/reciprocal_div.h> +#include <linux/debugobjects.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -174,12 +175,14 @@ SLAB_CACHE_DMA | \ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS) #endif /* @@ -858,7 +861,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, *left_over = slab_size - nr_objs*buffer_size - mgmt_size; } -#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) +#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg) static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) @@ -2153,7 +2156,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ if (!name || in_interrupt() || (size < BYTES_PER_WORD) || size > KMALLOC_MAX_SIZE) { - printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, + printk(KERN_ERR "%s: Early error in slab %s\n", __func__, name); BUG(); } @@ -3760,6 +3763,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) local_irq_save(flags); debug_check_no_locks_freed(objp, obj_size(cachep)); + if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, obj_size(cachep)); __cache_free(cachep, objp); local_irq_restore(flags); } @@ -3785,6 +3790,7 @@ void kfree(const void *objp) kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); + debug_check_no_obj_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); } @@ -19,8 +19,10 @@ #include <linux/cpuset.h> #include <linux/mempolicy.h> #include <linux/ctype.h> +#include <linux/debugobjects.h> #include <linux/kallsyms.h> #include <linux/memory.h> +#include <linux/math64.h> /* * Lock order: @@ -215,7 +217,7 @@ struct track { enum track_item { TRACK_ALLOC, TRACK_FREE }; -#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) +#ifdef CONFIG_SLUB_DEBUG static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); @@ -812,7 +814,8 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) return search == NULL; } -static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) +static void trace(struct kmem_cache *s, struct page *page, void *object, + int alloc) { if (s->flags & SLAB_TRACE) { printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", @@ -1265,8 +1268,7 @@ static void add_partial(struct kmem_cache_node *n, spin_unlock(&n->list_lock); } -static void remove_partial(struct kmem_cache *s, - struct page *page) +static void remove_partial(struct kmem_cache *s, struct page *page) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); @@ -1281,7 +1283,8 @@ static void remove_partial(struct kmem_cache *s, * * Must hold list_lock. */ -static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) +static inline int lock_and_freeze_slab(struct kmem_cache_node *n, + struct page *page) { if (slab_trylock(page)) { list_del(&page->lru); @@ -1418,8 +1421,8 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * so that the others get filled first. That way the * size of the partial list stays small. * - * kmem_cache_shrink can reclaim any empty slabs from the - * partial list. + * kmem_cache_shrink can reclaim any empty slabs from + * the partial list. */ add_partial(n, page, 1); slab_unlock(page); @@ -1747,6 +1750,8 @@ static __always_inline void slab_free(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); debug_check_no_locks_freed(object, c->objsize); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; @@ -2905,7 +2910,7 @@ static int slab_mem_going_online_callback(void *arg) return 0; /* - * We are bringing a node online. No memory is availabe yet. We must + * We are bringing a node online. No memory is available yet. We must * allocate a kmem_cache_node structure in order to bring the node * online. */ @@ -2978,7 +2983,7 @@ void __init kmem_cache_init(void) kmalloc_caches[0].refcount = -1; caches++; - hotplug_memory_notifier(slab_memory_callback, 1); + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif /* Able to allocate the per node structures */ @@ -3242,7 +3247,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return slab_alloc(s, gfpflags, node, caller); } -#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) +#ifdef CONFIG_SLUB_DEBUG static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -3271,9 +3276,7 @@ static int count_free(struct page *page) { return page->objects - page->inuse; } -#endif -#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { @@ -3618,12 +3621,10 @@ static int list_locations(struct kmem_cache *s, char *buf, len += sprintf(buf + len, "<not-available>"); if (l->sum_time != l->min_time) { - unsigned long remainder; - len += sprintf(buf + len, " age=%ld/%ld/%ld", - l->min_time, - div_long_long_rem(l->sum_time, l->count, &remainder), - l->max_time); + l->min_time, + (long)div_u64(l->sum_time, l->count), + l->max_time); } else len += sprintf(buf + len, " age=%ld", l->min_time); @@ -3810,7 +3811,12 @@ SLAB_ATTR_RO(objs_per_slab); static ssize_t order_store(struct kmem_cache *s, const char *buf, size_t length) { - int order = simple_strtoul(buf, NULL, 10); + unsigned long order; + int err; + + err = strict_strtoul(buf, 10, &order); + if (err) + return err; if (order > slub_max_order || order < slub_min_order) return -EINVAL; @@ -4063,10 +4069,16 @@ static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, const char *buf, size_t length) { - int n = simple_strtoul(buf, NULL, 10); + unsigned long ratio; + int err; + + err = strict_strtoul(buf, 10, &ratio); + if (err) + return err; + + if (ratio < 100) + s->remote_node_defrag_ratio = ratio * 10; - if (n < 100) - s->remote_node_defrag_ratio = n * 10; return length; } SLAB_ATTR(remote_node_defrag_ratio); @@ -4423,8 +4435,8 @@ __initcall(slab_sysfs_init); */ #ifdef CONFIG_SLABINFO -ssize_t slabinfo_write(struct file *file, const char __user * buffer, - size_t count, loff_t *ppos) +ssize_t slabinfo_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { return -EINVAL; } diff --git a/mm/sparse.c b/mm/sparse.c index dff71f1..36511c7 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -250,29 +250,18 @@ static unsigned long *__kmalloc_section_usemap(void) static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) { - unsigned long *usemap, section_nr; + unsigned long *usemap; struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - struct pglist_data *pgdat = NODE_DATA(nid); - /* - * Usemap's page can't be freed until freeing other sections - * which use it. And, Pgdat has same feature. - * If section A has pgdat and section B has usemap for other - * sections (includes section A), both sections can't be removed, - * because there is the dependency each other. - * To solve above issue, this collects all usemap on the same section - * which has pgdat. - */ - section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); - usemap = alloc_bootmem_section(usemap_size(), section_nr); + usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); if (usemap) return usemap; /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ nid = 0; - printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); + printk(KERN_WARNING "%s: allocation failed\n", __func__); return NULL; } @@ -302,7 +291,7 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) return map; printk(KERN_ERR "%s: sparsemem memory map backing failed " - "some memory will not be available.\n", __FUNCTION__); + "some memory will not be available.\n", __func__); ms->section_mem_map = 0; return NULL; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 50757ee..d8aadaf 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = { }; static struct backing_dev_info swap_backing_dev_info = { - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, .unplug_io_fn = swap_unplug_io_fn, }; diff --git a/mm/swapfile.c b/mm/swapfile.c index 67051be..bd1bb59 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1426,11 +1426,7 @@ static const struct file_operations proc_swaps_operations = { static int __init procswaps_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("swaps", 0, NULL); - if (entry) - entry->proc_fops = &proc_swaps_operations; + proc_create("swaps", 0, NULL, &proc_swaps_operations); return 0; } __initcall(procswaps_init); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e33e0ae..6e45b0f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/seq_file.h> +#include <linux/debugobjects.h> #include <linux/vmalloc.h> #include <linux/kallsyms.h> @@ -394,6 +395,7 @@ static void __vunmap(const void *addr, int deallocate_pages) } debug_check_no_locks_freed(addr, area->size); + debug_check_no_obj_freed(addr, area->size); if (deallocate_pages) { int i; @@ -545,6 +547,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages * @node: node to use for allocation or -1 + * @caller: caller's return address * * Allocate enough pages to cover @size from the page level * allocator with @gfp_mask flags. Map them into contiguous diff --git a/mm/vmscan.c b/mm/vmscan.c index eceac9f..9a29901 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -191,7 +191,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, shrinker->nr += delta; if (shrinker->nr < 0) { printk(KERN_ERR "%s: nr=%ld\n", - __FUNCTION__, shrinker->nr); + __func__, shrinker->nr); shrinker->nr = max_pass; } @@ -339,7 +339,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, if (PagePrivate(page)) { if (try_to_free_buffers(page)) { ClearPageDirty(page); - printk("%s: orphaned page\n", __FUNCTION__); + printk("%s: orphaned page\n", __func__); return PAGE_CLEAN; } } @@ -1299,6 +1299,9 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, * hope that some of these pages can be written. But if the allocating task * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. + * + * returns: 0, if no pages reclaimed + * else, the number of pages reclaimed */ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) @@ -1347,7 +1350,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, } total_scanned += sc->nr_scanned; if (nr_reclaimed >= sc->swap_cluster_max) { - ret = 1; + ret = nr_reclaimed; goto out; } @@ -1370,7 +1373,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, } /* top priority shrink_caches still had more to do? don't OOM, then */ if (!sc->all_unreclaimable && scan_global_lru(sc)) - ret = 1; + ret = nr_reclaimed; out: /* * Now that we've scanned all the zones at this priority level, note diff --git a/mm/vmstat.c b/mm/vmstat.c index ec6035e..1a32130 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -548,6 +548,10 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg) { pg_data_t *pgdat = (pg_data_t *)arg; + /* check memoryless node */ + if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) + return 0; + seq_printf(m, "Page block order: %d\n", pageblock_order); seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); seq_putc(m, '\n'); @@ -608,6 +612,7 @@ static const char * const vmstat_text[] = { "nr_unstable", "nr_bounce", "nr_vmscan_write", + "nr_writeback_temp", #ifdef CONFIG_NUMA "numa_hit", |