diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 14 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/linux/sysctl.h | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/vmscan.c | 27 |
7 files changed, 69 insertions, 14 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 86754eb..7cee902 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm: - block_dump - drop-caches - zone_reclaim_mode +- min_unmapped_ratio - panic_on_oom ============================================================== @@ -168,6 +169,19 @@ in all nodes of the system. ============================================================= +min_unmapped_ratio: + +This is available only on NUMA kernels. + +A percentage of the file backed pages in each zone. Zone reclaim will only +occur if more than this percentage of pages are file backed and unmapped. +This is to insure that a minimal amount of local pages is still available for +file I/O even if the node is overallocated. + +The default is 1 percent. + +============================================================= + panic_on_oom This enables or disables panic on out-of-memory feature. If this is set to 1, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 27e748e..656b588 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -150,6 +150,10 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA + /* + * zone reclaim becomes active if more unmapped pages exist. + */ + unsigned long min_unmapped_ratio; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/swap.h b/include/linux/swap.h index cf6ca6e..5e59184 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -189,6 +189,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; +extern int sysctl_min_unmapped_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 46e4d8f..e4b1a4d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,7 +188,7 @@ enum VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ - VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ + VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99a58f2..362a0cc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -932,6 +932,17 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_MIN_UNMAPPED, + .procname = "min_unmapped_ratio", + .data = &sysctl_min_unmapped_ratio, + .maxlen = sizeof(sysctl_min_unmapped_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #endif #ifdef CONFIG_X86_32 { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e792a5..54a4f53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; +#ifdef CONFIG_NUMA + zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) + / 100; +#endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); @@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, return 0; } +#ifdef CONFIG_NUMA +int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_unmapped_ratio = (zone->present_pages * + sysctl_min_unmapped_ratio) / 100; + return 0; +} +#endif + /* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() diff --git a/mm/vmscan.c b/mm/vmscan.c index ff2ebe9..5d4c4d0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1503,10 +1503,6 @@ module_init(kswapd_init) * * If non-zero call zone_reclaim when the number of free pages falls below * the watermarks. - * - * In the future we may add flags to the mode. However, the page allocator - * should only have to check that zone_reclaim_mode != 0 before calling - * zone_reclaim(). */ int zone_reclaim_mode __read_mostly; @@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly; #define ZONE_RECLAIM_PRIORITY 4 /* + * Percentage of pages in a zone that must be unmapped for zone_reclaim to + * occur. + */ +int sysctl_min_unmapped_ratio = 1; + +/* * Try to free up some pages from this zone through reclaim. */ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) @@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Do not reclaim if there are not enough reclaimable pages in this - * zone that would satify this allocations. + * Zone reclaim reclaims unmapped file backed pages. * - * All unmapped pagecache pages are reclaimable. - * - * Both counters may be temporarily off a bit so we use - * SWAP_CLUSTER_MAX as the boundary. It may also be good to - * leave a few frequently used unmapped pagecache pages around. + * A small portion of unmapped file backed pages is needed for + * file I/O otherwise pages read by file I/O will be immediately + * thrown out if the zone is overallocated. So we do not reclaim + * if less than a specified percentage of the zone is used by + * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) - return 0; + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + return 0; /* * Avoid concurrent zone reclaims, do not reclaim in a zone that does |