summaryrefslogtreecommitdiff
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c179
1 files changed, 81 insertions, 98 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 5920a41..b344e67 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -137,6 +137,7 @@
/* Shouldn't this be in a header file somewhere? */
#define BYTES_PER_WORD sizeof(void *)
+#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
#ifndef cache_line_size
#define cache_line_size() L1_CACHE_BYTES
@@ -148,10 +149,11 @@
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than BYTES_PER_WORD. ARCH_KMALLOC_MINALIGN allows that.
- * Note that this flag disables some debug features.
+ * alignment larger than the alignment of a 64-bit integer.
+ * ARCH_KMALLOC_MINALIGN allows that.
+ * Note that increasing this value may disable some debug features.
*/
-#define ARCH_KMALLOC_MINALIGN 0
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
@@ -408,9 +410,6 @@ struct kmem_cache {
/* constructor func */
void (*ctor) (void *, struct kmem_cache *, unsigned long);
- /* de-constructor func */
- void (*dtor) (void *, struct kmem_cache *, unsigned long);
-
/* 5) cache creation/removal */
const char *name;
struct list_head next;
@@ -536,19 +535,22 @@ static int obj_size(struct kmem_cache *cachep)
return cachep->obj_size;
}
-static unsigned long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
- return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD);
+ return (unsigned long long*) (objp + obj_offset(cachep) -
+ sizeof(unsigned long long));
}
-static unsigned long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
if (cachep->flags & SLAB_STORE_USER)
- return (unsigned long *)(objp + cachep->buffer_size -
- 2 * BYTES_PER_WORD);
- return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD);
+ return (unsigned long long *)(objp + cachep->buffer_size -
+ sizeof(unsigned long long) -
+ REDZONE_ALIGN);
+ return (unsigned long long *) (objp + cachep->buffer_size -
+ sizeof(unsigned long long));
}
static void **dbg_userword(struct kmem_cache *cachep, void *objp)
@@ -561,28 +563,13 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#define obj_offset(x) 0
#define obj_size(cachep) (cachep->buffer_size)
-#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long *)NULL;})
-#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long *)NULL;})
+#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
+#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
#endif
/*
- * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp
- * order.
- */
-#if defined(CONFIG_LARGE_ALLOCS)
-#define MAX_OBJ_ORDER 13 /* up to 32Mb */
-#define MAX_GFP_ORDER 13 /* up to 32Mb */
-#elif defined(CONFIG_MMU)
-#define MAX_OBJ_ORDER 5 /* 32 pages */
-#define MAX_GFP_ORDER 5 /* 32 pages */
-#else
-#define MAX_OBJ_ORDER 8 /* up to 1Mb */
-#define MAX_GFP_ORDER 8 /* up to 1Mb */
-#endif
-
-/*
* Do not go above this order unless 0 objects fit into the slab.
*/
#define BREAK_GFP_ORDER_HI 1
@@ -924,12 +911,6 @@ static void next_reap_node(void)
{
int node = __get_cpu_var(reap_node);
- /*
- * Also drain per cpu pages on remote zones
- */
- if (node != numa_node_id())
- drain_node_pages(node);
-
node = next_node(node, node_online_map);
if (unlikely(node >= MAX_NUMNODES))
node = first_node(node_online_map);
@@ -1182,8 +1163,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
int memsize = sizeof(struct kmem_list3);
switch (action) {
- case CPU_UP_PREPARE:
+ case CPU_LOCK_ACQUIRE:
mutex_lock(&cache_chain_mutex);
+ break;
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
/*
* We need to do this right in the beginning since
* alloc_arraycache's are going to use this list.
@@ -1270,17 +1254,28 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
}
break;
case CPU_ONLINE:
- mutex_unlock(&cache_chain_mutex);
+ case CPU_ONLINE_FROZEN:
start_cpu_timer(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- mutex_lock(&cache_chain_mutex);
- break;
- case CPU_DOWN_FAILED:
- mutex_unlock(&cache_chain_mutex);
- break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ /*
+ * Shutdown cache reaper. Note that the cache_chain_mutex is
+ * held so that if cache_reap() is invoked it cannot do
+ * anything expensive but will only modify reap_work
+ * and reschedule the timer.
+ */
+ cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
+ /* Now the cache_reaper is guaranteed to be not running. */
+ per_cpu(reap_work, cpu).work.func = NULL;
+ break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ start_cpu_timer(cpu);
+ break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
/*
* Even if all the cpus of a node are down, we don't free the
* kmem_list3 of any cache. This to avoid a race between
@@ -1292,6 +1287,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
/* fall thru */
#endif
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
struct array_cache *shared;
@@ -1350,6 +1346,8 @@ free_array_cache:
continue;
drain_freelist(cachep, l3, l3->free_objects);
}
+ break;
+ case CPU_LOCK_RELEASE:
mutex_unlock(&cache_chain_mutex);
break;
}
@@ -1776,7 +1774,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
char *realobj;
if (cachep->flags & SLAB_RED_ZONE) {
- printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
+ printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
*dbg_redzone1(cachep, objp),
*dbg_redzone2(cachep, objp));
}
@@ -1896,20 +1894,11 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
slab_error(cachep, "end of a freed object "
"was overwritten");
}
- if (cachep->dtor && !(cachep->flags & SLAB_POISON))
- (cachep->dtor) (objp + obj_offset(cachep), cachep, 0);
}
}
#else
static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
{
- if (cachep->dtor) {
- int i;
- for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, slabp, i);
- (cachep->dtor) (objp, cachep, 0);
- }
- }
}
#endif
@@ -1998,7 +1987,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
size_t left_over = 0;
int gfporder;
- for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) {
+ for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
unsigned int num;
size_t remainder;
@@ -2048,7 +2037,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
return left_over;
}
-static int setup_cpu_cache(struct kmem_cache *cachep)
+static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
{
if (g_cpucache_up == FULL)
return enable_cpucache(cachep);
@@ -2109,7 +2098,7 @@ static int setup_cpu_cache(struct kmem_cache *cachep)
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
- * @dtor: A destructor for the objects.
+ * @dtor: A destructor for the objects (not implemented anymore).
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a int, but can be interrupted.
@@ -2144,7 +2133,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* Sanity checks... these are all serious usage bugs.
*/
if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
- (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
+ size > KMALLOC_MAX_SIZE || dtor) {
printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
name);
BUG();
@@ -2190,7 +2179,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* above the next power of two: caches with object sizes just above a
* power of two have a significant amount of internal fragmentation.
*/
- if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
+ if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
+ 2 * sizeof(unsigned long long)))
flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
if (!(flags & SLAB_DESTROY_BY_RCU))
flags |= SLAB_POISON;
@@ -2198,9 +2188,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (flags & SLAB_DESTROY_BY_RCU)
BUG_ON(flags & SLAB_POISON);
#endif
- if (flags & SLAB_DESTROY_BY_RCU)
- BUG_ON(dtor);
-
/*
* Always checks flags, a caller might be expecting debug support which
* isn't available.
@@ -2234,13 +2221,21 @@ kmem_cache_create (const char *name, size_t size, size_t align,
}
/*
- * Redzoning and user store require word alignment. Note this will be
- * overridden by architecture or caller mandated alignment if either
- * is greater than BYTES_PER_WORD.
+ * Redzoning and user store require word alignment or possibly larger.
+ * Note this will be overridden by architecture or caller mandated
+ * alignment if either is greater than BYTES_PER_WORD.
*/
- if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
+ if (flags & SLAB_STORE_USER)
ralign = BYTES_PER_WORD;
+ if (flags & SLAB_RED_ZONE) {
+ ralign = REDZONE_ALIGN;
+ /* If redzoning, ensure that the second redzone is suitably
+ * aligned, by adjusting the object size accordingly. */
+ size += REDZONE_ALIGN - 1;
+ size &= ~(REDZONE_ALIGN - 1);
+ }
+
/* 2) arch mandated alignment */
if (ralign < ARCH_SLAB_MINALIGN) {
ralign = ARCH_SLAB_MINALIGN;
@@ -2250,7 +2245,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
ralign = align;
}
/* disable debug if necessary */
- if (ralign > BYTES_PER_WORD)
+ if (ralign > __alignof__(unsigned long long))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/*
* 4) Store it.
@@ -2271,14 +2266,18 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */
- cachep->obj_offset += BYTES_PER_WORD;
- size += 2 * BYTES_PER_WORD;
+ cachep->obj_offset += sizeof(unsigned long long);
+ size += 2 * sizeof(unsigned long long);
}
if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of
- * the real object.
+ * the real object. But if the second red zone needs to be
+ * aligned to 64 bits, we must allow that much space.
*/
- size += BYTES_PER_WORD;
+ if (flags & SLAB_RED_ZONE)
+ size += REDZONE_ALIGN;
+ else
+ size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
@@ -2355,7 +2354,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
BUG_ON(!cachep->slabp_cache);
}
cachep->ctor = ctor;
- cachep->dtor = dtor;
cachep->name = name;
if (setup_cpu_cache(cachep)) {
@@ -2610,7 +2608,7 @@ static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
}
static void cache_init_objs(struct kmem_cache *cachep,
- struct slab *slabp, unsigned long ctor_flags)
+ struct slab *slabp)
{
int i;
@@ -2634,7 +2632,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
*/
if (cachep->ctor && !(cachep->flags & SLAB_POISON))
cachep->ctor(objp + obj_offset(cachep), cachep,
- ctor_flags);
+ 0);
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2650,7 +2648,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
cachep->buffer_size / PAGE_SIZE, 0);
#else
if (cachep->ctor)
- cachep->ctor(objp, cachep, ctor_flags);
+ cachep->ctor(objp, cachep, 0);
#endif
slab_bufctl(slabp)[i] = i + 1;
}
@@ -2739,7 +2737,6 @@ static int cache_grow(struct kmem_cache *cachep,
struct slab *slabp;
size_t offset;
gfp_t local_flags;
- unsigned long ctor_flags;
struct kmem_list3 *l3;
/*
@@ -2748,7 +2745,6 @@ static int cache_grow(struct kmem_cache *cachep,
*/
BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
- ctor_flags = SLAB_CTOR_CONSTRUCTOR;
local_flags = (flags & GFP_LEVEL_MASK);
/* Take the l3 list lock to change the colour_next on this node */
check_irq_off();
@@ -2793,7 +2789,7 @@ static int cache_grow(struct kmem_cache *cachep,
slabp->nodeid = nodeid;
slab_map_pages(cachep, slabp, objp);
- cache_init_objs(cachep, slabp, ctor_flags);
+ cache_init_objs(cachep, slabp);
if (local_flags & __GFP_WAIT)
local_irq_disable();
@@ -2820,7 +2816,6 @@ failed:
* Perform extra freeing checks:
* - detect bad pointers.
* - POISON/RED_ZONE checking
- * - destructor calls, for caches with POISON+dtor
*/
static void kfree_debugcheck(const void *objp)
{
@@ -2833,7 +2828,7 @@ static void kfree_debugcheck(const void *objp)
static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
{
- unsigned long redzone1, redzone2;
+ unsigned long long redzone1, redzone2;
redzone1 = *dbg_redzone1(cache, obj);
redzone2 = *dbg_redzone2(cache, obj);
@@ -2849,7 +2844,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
else
slab_error(cache, "memory outside object was overwritten");
- printk(KERN_ERR "%p: redzone 1:0x%lx, redzone 2:0x%lx.\n",
+ printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
obj, redzone1, redzone2);
}
@@ -2879,12 +2874,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
- if (cachep->flags & SLAB_POISON && cachep->dtor) {
- /* we want to cache poison the object,
- * call the destruction callback
- */
- cachep->dtor(objp + obj_offset(cachep), cachep, 0);
- }
#ifdef CONFIG_DEBUG_SLAB_LEAK
slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
#endif
@@ -3065,7 +3054,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
slab_error(cachep, "double free, or memory outside"
" object was overwritten");
printk(KERN_ERR
- "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
+ "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
objp, *dbg_redzone1(cachep, objp),
*dbg_redzone2(cachep, objp));
}
@@ -3084,7 +3073,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
#endif
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
- cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR);
+ cachep->ctor(objp, cachep, 0);
#if ARCH_SLAB_MINALIGN
if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
@@ -3563,7 +3552,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
- if (use_alien_caches && cache_free_alien(cachep, objp))
+ if (cache_free_alien(cachep, objp))
return;
if (likely(ac->avail < ac->limit)) {
@@ -3738,7 +3727,6 @@ EXPORT_SYMBOL(__kmalloc);
/**
* krealloc - reallocate memory. The contents will remain unchanged.
- *
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
@@ -4136,7 +4124,6 @@ next:
check_irq_on();
mutex_unlock(&cache_chain_mutex);
next_reap_node();
- refresh_cpu_vm_stats(smp_processor_id());
out:
/* Set up the next iteration */
schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
@@ -4428,16 +4415,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
static void show_symbol(struct seq_file *m, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
- char *modname;
- const char *name;
unsigned long offset, size;
- char namebuf[KSYM_NAME_LEN+1];
-
- name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+ char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1];
- if (name) {
+ if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
- if (modname)
+ if (modname[0])
seq_printf(m, " [%s]", modname);
return;
}