summaryrefslogtreecommitdiff
path: root/drivers/staging/zsmalloc/zsmalloc-main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 18:14:49 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 18:14:49 (GMT)
commitb13bc8dda81c54a66a1c84e66f60b8feba659f28 (patch)
tree100a26eada424fa5d9b0e5eaaf4e23b8fa036fc8 /drivers/staging/zsmalloc/zsmalloc-main.c
parent9fc377799bc9bfd8d5cb35d0d1ea2e2458cbdbb3 (diff)
parent419e9266884fa853179ab726c27a63a9d3ae46e3 (diff)
downloadlinux-fsl-qoriq-b13bc8dda81c54a66a1c84e66f60b8feba659f28.tar.xz
Merge tag 'staging-3.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
Pull staging tree patches from Greg Kroah-Hartman: "Here's the big staging tree merge for the 3.6-rc1 merge window. There are some patches in here outside of drivers/staging/, notibly the iio code (which is still stradeling the staging / not staging boundry), the pstore code, and the tracing code. All of these have gotten acks from the various subsystem maintainers to be included in this tree. The pstore and tracing patches are related, and are coming here as they replace one of the android staging drivers. Otherwise, the normal staging mess. Lots of cleanups and a few new drivers (some iio drivers, and the large csr wireless driver abomination.) Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>" Fixed up trivial conflicts in drivers/staging/comedi/drivers/s626.h and drivers/staging/gdm72xx/netlink_k.c * tag 'staging-3.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging: (1108 commits) staging: csr: delete a bunch of unused library functions staging: csr: remove csr_utf16.c staging: csr: remove csr_pmem.h staging: csr: remove CsrPmemAlloc staging: csr: remove CsrPmemFree() staging: csr: remove CsrMemAllocDma() staging: csr: remove CsrMemCalloc() staging: csr: remove CsrMemAlloc() staging: csr: remove CsrMemFree() and CsrMemFreeDma() staging: csr: remove csr_util.h staging: csr: remove CsrOffSetOf() stating: csr: remove unneeded #includes in csr_util.c staging: csr: make CsrUInt16ToHex static staging: csr: remove CsrMemCpy() staging: csr: remove CsrStrLen() staging: csr: remove CsrVsnprintf() staging: csr: remove CsrStrDup staging: csr: remove CsrStrChr() staging: csr: remove CsrStrNCmp staging: csr: remove CsrStrCmp ...
Diffstat (limited to 'drivers/staging/zsmalloc/zsmalloc-main.c')
-rw-r--r--drivers/staging/zsmalloc/zsmalloc-main.c233
1 files changed, 159 insertions, 74 deletions
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index 4496737..8b0bcb6 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -10,6 +10,54 @@
* Released under the terms of GNU General Public License Version 2.0
*/
+
+/*
+ * This allocator is designed for use with zcache and zram. Thus, the
+ * allocator is supposed to work well under low memory conditions. In
+ * particular, it never attempts higher order page allocation which is
+ * very likely to fail under memory pressure. On the other hand, if we
+ * just use single (0-order) pages, it would suffer from very high
+ * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
+ * an entire page. This was one of the major issues with its predecessor
+ * (xvmalloc).
+ *
+ * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
+ * and links them together using various 'struct page' fields. These linked
+ * pages act as a single higher-order page i.e. an object can span 0-order
+ * page boundaries. The code refers to these linked pages as a single entity
+ * called zspage.
+ *
+ * Following is how we use various fields and flags of underlying
+ * struct page(s) to form a zspage.
+ *
+ * Usage of struct page fields:
+ * page->first_page: points to the first component (0-order) page
+ * page->index (union with page->freelist): offset of the first object
+ * starting in this page. For the first page, this is
+ * always 0, so we use this field (aka freelist) to point
+ * to the first free object in zspage.
+ * page->lru: links together all component pages (except the first page)
+ * of a zspage
+ *
+ * For _first_ page only:
+ *
+ * page->private (union with page->first_page): refers to the
+ * component page after the first page
+ * page->freelist: points to the first free object in zspage.
+ * Free objects are linked together using in-place
+ * metadata.
+ * page->objects: maximum number of objects we can store in this
+ * zspage (class->zspage_order * PAGE_SIZE / class->size)
+ * page->lru: links together first pages of various zspages.
+ * Basically forming list of zspages in a fullness group.
+ * page->mapping: class index and fullness group of the zspage
+ *
+ * Usage of struct page flags:
+ * PG_private: identifies the first component page
+ * PG_private2: identifies the last component page
+ *
+ */
+
#ifdef CONFIG_ZSMALLOC_DEBUG
#define DEBUG
#endif
@@ -247,13 +295,11 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
}
/* Decode <page, obj_idx> pair from the given object handle */
-static void obj_handle_to_location(void *handle, struct page **page,
+static void obj_handle_to_location(unsigned long handle, struct page **page,
unsigned long *obj_idx)
{
- unsigned long hval = (unsigned long)handle;
-
- *page = pfn_to_page(hval >> OBJ_INDEX_BITS);
- *obj_idx = hval & OBJ_INDEX_MASK;
+ *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
+ *obj_idx = handle & OBJ_INDEX_MASK;
}
static unsigned long obj_idx_to_offset(struct page *page,
@@ -354,7 +400,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
{
int i, error;
- struct page *first_page = NULL;
+ struct page *first_page = NULL, *uninitialized_var(prev_page);
/*
* Allocate individual pages and link them together as:
@@ -369,7 +415,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
*/
error = -ENOMEM;
for (i = 0; i < class->pages_per_zspage; i++) {
- struct page *page, *prev_page;
+ struct page *page;
page = alloc_page(flags);
if (!page)
@@ -424,12 +470,51 @@ static struct page *find_get_zspage(struct size_class *class)
return page;
}
+static void zs_copy_map_object(char *buf, struct page *firstpage,
+ int off, int size)
+{
+ struct page *pages[2];
+ int sizes[2];
+ void *addr;
+
+ pages[0] = firstpage;
+ pages[1] = get_next_page(firstpage);
+ BUG_ON(!pages[1]);
+
+ sizes[0] = PAGE_SIZE - off;
+ sizes[1] = size - sizes[0];
+
+ /* copy object to per-cpu buffer */
+ addr = kmap_atomic(pages[0]);
+ memcpy(buf, addr + off, sizes[0]);
+ kunmap_atomic(addr);
+ addr = kmap_atomic(pages[1]);
+ memcpy(buf + sizes[0], addr, sizes[1]);
+ kunmap_atomic(addr);
+}
-/*
- * If this becomes a separate module, register zs_init() with
- * module_init(), zs_exit with module_exit(), and remove zs_initialized
-*/
-static int zs_initialized;
+static void zs_copy_unmap_object(char *buf, struct page *firstpage,
+ int off, int size)
+{
+ struct page *pages[2];
+ int sizes[2];
+ void *addr;
+
+ pages[0] = firstpage;
+ pages[1] = get_next_page(firstpage);
+ BUG_ON(!pages[1]);
+
+ sizes[0] = PAGE_SIZE - off;
+ sizes[1] = size - sizes[0];
+
+ /* copy per-cpu buffer to object */
+ addr = kmap_atomic(pages[0]);
+ memcpy(addr + off, buf, sizes[0]);
+ kunmap_atomic(addr);
+ addr = kmap_atomic(pages[1]);
+ memcpy(addr, buf + sizes[0], sizes[1]);
+ kunmap_atomic(addr);
+}
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
void *pcpu)
@@ -440,18 +525,23 @@ static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
switch (action) {
case CPU_UP_PREPARE:
area = &per_cpu(zs_map_area, cpu);
- if (area->vm)
- break;
- area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes);
- if (!area->vm)
- return notifier_from_errno(-ENOMEM);
+ /*
+ * Make sure we don't leak memory if a cpu UP notification
+ * and zs_init() race and both call zs_cpu_up() on the same cpu
+ */
+ if (area->vm_buf)
+ return 0;
+ area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!area->vm_buf)
+ return -ENOMEM;
+ return 0;
break;
case CPU_DEAD:
case CPU_UP_CANCELED:
area = &per_cpu(zs_map_area, cpu);
- if (area->vm)
- free_vm_area(area->vm);
- area->vm = NULL;
+ if (area->vm_buf)
+ free_page((unsigned long)area->vm_buf);
+ area->vm_buf = NULL;
break;
}
@@ -489,7 +579,7 @@ fail:
struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
{
- int i, error, ovhd_size;
+ int i, ovhd_size;
struct zs_pool *pool;
if (!name)
@@ -516,28 +606,9 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
}
- /*
- * If this becomes a separate module, register zs_init with
- * module_init, and remove this block
- */
- if (!zs_initialized) {
- error = zs_init();
- if (error)
- goto cleanup;
- zs_initialized = 1;
- }
-
pool->flags = flags;
pool->name = name;
- error = 0; /* Success */
-
-cleanup:
- if (error) {
- zs_destroy_pool(pool);
- pool = NULL;
- }
-
return pool;
}
EXPORT_SYMBOL_GPL(zs_create_pool);
@@ -568,12 +639,12 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
* @size: size of block to allocate
*
* On success, handle to the allocated object is returned,
- * otherwise NULL.
+ * otherwise 0.
* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
*/
-void *zs_malloc(struct zs_pool *pool, size_t size)
+unsigned long zs_malloc(struct zs_pool *pool, size_t size)
{
- void *obj;
+ unsigned long obj;
struct link_free *link;
int class_idx;
struct size_class *class;
@@ -582,7 +653,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
unsigned long m_objidx, m_offset;
if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
- return NULL;
+ return 0;
class_idx = get_size_class_index(size);
class = &pool->size_class[class_idx];
@@ -595,14 +666,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
spin_unlock(&class->lock);
first_page = alloc_zspage(class, pool->flags);
if (unlikely(!first_page))
- return NULL;
+ return 0;
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
spin_lock(&class->lock);
class->pages_allocated += class->pages_per_zspage;
}
- obj = first_page->freelist;
+ obj = (unsigned long)first_page->freelist;
obj_handle_to_location(obj, &m_page, &m_objidx);
m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
@@ -621,7 +692,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
}
EXPORT_SYMBOL_GPL(zs_malloc);
-void zs_free(struct zs_pool *pool, void *obj)
+void zs_free(struct zs_pool *pool, unsigned long obj)
{
struct link_free *link;
struct page *first_page, *f_page;
@@ -648,7 +719,7 @@ void zs_free(struct zs_pool *pool, void *obj)
+ f_offset);
link->next = first_page->freelist;
kunmap_atomic(link);
- first_page->freelist = obj;
+ first_page->freelist = (void *)obj;
first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
@@ -670,9 +741,15 @@ EXPORT_SYMBOL_GPL(zs_free);
*
* Before using an object allocated from zs_malloc, it must be mapped using
* this function. When done with the object, it must be unmapped using
- * zs_unmap_object
+ * zs_unmap_object.
+ *
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
*/
-void *zs_map_object(struct zs_pool *pool, void *handle)
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ enum zs_mapmode mm)
{
struct page *page;
unsigned long obj_idx, off;
@@ -693,26 +770,20 @@ void *zs_map_object(struct zs_pool *pool, void *handle)
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
area->vm_addr = kmap_atomic(page);
- } else {
- /* this object spans two pages */
- struct page *nextp;
-
- nextp = get_next_page(page);
- BUG_ON(!nextp);
-
-
- set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL));
- set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL));
-
- /* We pre-allocated VM area so mapping can never fail */
- area->vm_addr = area->vm->addr;
+ return area->vm_addr + off;
}
- return area->vm_addr + off;
+ /* disable page faults to match kmap_atomic() return conditions */
+ pagefault_disable();
+
+ if (mm != ZS_MM_WO)
+ zs_copy_map_object(area->vm_buf, page, off, class->size);
+ area->vm_addr = NULL;
+ return area->vm_buf;
}
EXPORT_SYMBOL_GPL(zs_map_object);
-void zs_unmap_object(struct zs_pool *pool, void *handle)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct page *page;
unsigned long obj_idx, off;
@@ -722,6 +793,17 @@ void zs_unmap_object(struct zs_pool *pool, void *handle)
struct size_class *class;
struct mapping_area *area;
+ area = &__get_cpu_var(zs_map_area);
+ /* single-page object fastpath */
+ if (area->vm_addr) {
+ kunmap_atomic(area->vm_addr);
+ goto out;
+ }
+
+ /* no write fastpath */
+ if (area->vm_mm == ZS_MM_RO)
+ goto pfenable;
+
BUG_ON(!handle);
obj_handle_to_location(handle, &page, &obj_idx);
@@ -729,15 +811,12 @@ void zs_unmap_object(struct zs_pool *pool, void *handle)
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
- area = &__get_cpu_var(zs_map_area);
- if (off + class->size <= PAGE_SIZE) {
- kunmap_atomic(area->vm_addr);
- } else {
- set_pte(area->vm_ptes[0], __pte(0));
- set_pte(area->vm_ptes[1], __pte(0));
- __flush_tlb_one((unsigned long)area->vm_addr);
- __flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE);
- }
+ zs_copy_unmap_object(area->vm_buf, page, off, class->size);
+
+pfenable:
+ /* enable page faults to match kunmap_atomic() return conditions */
+ pagefault_enable();
+out:
put_cpu_var(zs_map_area);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -753,3 +832,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool)
return npages << PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
+
+module_init(zs_init);
+module_exit(zs_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");