21 files changed, 868 insertions, 173 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 9960be0..bb1326d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -194,4 +194,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 config NLATTR
 	bool
 
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+       bool
+
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 116a350..12327b2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -300,7 +300,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT
 
 config DEBUG_SLAB
 	bool "Debug slab memory allocations"
-	depends on DEBUG_KERNEL && SLAB
+	depends on DEBUG_KERNEL && SLAB && !KMEMCHECK
 	help
 	  Say Y here to have the kernel do limited verification on memory
 	  allocation as well as poisoning memory on free to catch use of freed
@@ -312,7 +312,7 @@ config DEBUG_SLAB_LEAK
 
 config SLUB_DEBUG_ON
 	bool "SLUB debugging on by default"
-	depends on SLUB && SLUB_DEBUG
+	depends on SLUB && SLUB_DEBUG && !KMEMCHECK
 	default n
 	help
 	  Boot with debugging on by default. SLUB boots by default with
@@ -340,8 +340,6 @@ config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \
 		!MEMORY_HOTPLUG
-	select DEBUG_SLAB if SLAB
-	select SLUB_DEBUG if SLUB
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
@@ -355,9 +353,24 @@ config DEBUG_KMEMLEAK
 	  allocations. See Documentation/kmemleak.txt for more
 	  details.
 
+	  Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
+	  of finding leaks due to the slab objects poisoning.
+
 	  In order to access the kmemleak file, debugfs needs to be
 	  mounted (usually at /sys/kernel/debug).
 
+config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
+	int "Maximum kmemleak early log entries"
+	depends on DEBUG_KMEMLEAK
+	range 200 2000
+	default 400
+	help
+	  Kmemleak must track all the memory allocations to avoid
+	  reporting false positives. Since memory may be allocated or
+	  freed before kmemleak is initialised, an early log buffer is
+	  used to store these actions. If kmemleak reports "early log
+	  buffer exceeded", please increase this value.
+
 config DEBUG_KMEMLEAK_TEST
 	tristate "Simple test for the kernel memory leak detector"
 	depends on DEBUG_KMEMLEAK
@@ -472,7 +485,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !X86 && !MIPS && !PPC && !ARM_UNWIND && !S390
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -996,3 +1009,5 @@ config DMA_API_DEBUG
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
+
+source "lib/Kconfig.kmemcheck"
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
new file mode 100644
index 0000000..603c81b
--- /dev/null
+++ b/lib/Kconfig.kmemcheck
@@ -0,0 +1,91 @@
+config HAVE_ARCH_KMEMCHECK
+	bool
+
+menuconfig KMEMCHECK
+	bool "kmemcheck: trap use of uninitialized memory"
+	depends on DEBUG_KERNEL
+	depends on !X86_USE_3DNOW
+	depends on SLUB || SLAB
+	depends on !CC_OPTIMIZE_FOR_SIZE
+	depends on !FUNCTION_TRACER
+	select FRAME_POINTER
+	select STACKTRACE
+	default n
+	help
+	  This option enables tracing of dynamically allocated kernel memory
+	  to see if memory is used before it has been given an initial value.
+	  Be aware that this requires half of your memory for bookkeeping and
+	  will insert extra code at *every* read and write to tracked memory
+	  thus slow down the kernel code (but user code is unaffected).
+
+	  The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
+	  or enable kmemcheck at boot-time. If the kernel is started with
+	  kmemcheck=0, the large memory and CPU overhead is not incurred.
+
+choice
+	prompt "kmemcheck: default mode at boot"
+	depends on KMEMCHECK
+	default KMEMCHECK_ONESHOT_BY_DEFAULT
+	help
+	  This option controls the default behaviour of kmemcheck when the
+	  kernel boots and no kmemcheck= parameter is given.
+
+config KMEMCHECK_DISABLED_BY_DEFAULT
+	bool "disabled"
+	depends on KMEMCHECK
+
+config KMEMCHECK_ENABLED_BY_DEFAULT
+	bool "enabled"
+	depends on KMEMCHECK
+
+config KMEMCHECK_ONESHOT_BY_DEFAULT
+	bool "one-shot"
+	depends on KMEMCHECK
+	help
+	  In one-shot mode, only the first error detected is reported before
+	  kmemcheck is disabled.
+
+endchoice
+
+config KMEMCHECK_QUEUE_SIZE
+	int "kmemcheck: error queue size"
+	depends on KMEMCHECK
+	default 64
+	help
+	  Select the maximum number of errors to store in the queue. Since
+	  errors can occur virtually anywhere and in any context, we need a
+	  temporary storage area which is guarantueed not to generate any
+	  other faults. The queue will be emptied as soon as a tasklet may
+	  be scheduled. If the queue is full, new error reports will be
+	  lost.
+
+config KMEMCHECK_SHADOW_COPY_SHIFT
+	int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
+	depends on KMEMCHECK
+	range 2 8
+	default 5
+	help
+	  Select the number of shadow bytes to save along with each entry of
+	  the queue. These bytes indicate what parts of an allocation are
+	  initialized, uninitialized, etc. and will be displayed when an
+	  error is detected to help the debugging of a particular problem.
+
+config KMEMCHECK_PARTIAL_OK
+	bool "kmemcheck: allow partially uninitialized memory"
+	depends on KMEMCHECK
+	default y
+	help
+	  This option works around certain GCC optimizations that produce
+	  32-bit reads from 16-bit variables where the upper 16 bits are
+	  thrown away afterwards. This may of course also hide some real
+	  bugs.
+
+config KMEMCHECK_BITOPS_OK
+	bool "kmemcheck: allow bit-field manipulation"
+	depends on KMEMCHECK
+	default n
+	help
+	  This option silences warnings that would be generated for bit-field
+	  accesses where not all the bits are initialized at the same time.
+	  This may also hide some real bugs.
+
diff --git a/lib/Makefile b/lib/Makefile
index 34c5c0e..2e78277 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o
+	 string_helpers.o gcd.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -95,6 +95,8 @@ obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
+obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 0000000..8bee16e
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,186 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS	16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+	spinlock_t lock;
+	char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+	unsigned long addr = (unsigned long) v;
+
+	addr >>= L1_CACHE_SHIFT;
+	addr ^= (addr >> 8) ^ (addr >> 16);
+	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_read);
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter = i;
+	spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_set);
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_add);
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_add_return);
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_sub);
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_sub_return);
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter - 1;
+	if (val >= 0)
+		v->counter = val;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_dec_if_positive);
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	if (val == o)
+		v->counter = n;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_cmpxchg);
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+EXPORT_SYMBOL(atomic64_xchg);
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	int ret = 1;
+
+	spin_lock_irqsave(lock, flags);
+	if (v->counter != u) {
+		v->counter += a;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(atomic64_add_unless);
+
+static int init_atomic64_lock(void)
+{
+	int i;
+
+	for (i = 0; i < NR_LOCKS; ++i)
+		spin_lock_init(&atomic64_lock[i].lock);
+	return 0;
+}
+
+pure_initcall(init_atomic64_lock);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 35a1f7f..7025658 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst,
 }
 EXPORT_SYMBOL(__bitmap_shift_left);
 
-void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
@@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_xor);
 
-void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & ~bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
diff --git a/lib/checksum.c b/lib/checksum.c
index 12e5a1c..b2e2fd4 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -55,7 +55,11 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		goto out;
 	odd = 1 & (unsigned long) buff;
 	if (odd) {
+#ifdef __LITTLE_ENDIAN
 		result = *buff;
+#else
+		result += (*buff << 8);
+#endif
 		len--;
 		buff++;
 	}
@@ -71,7 +75,7 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		if (count) {
 			unsigned long carry = 0;
 			do {
-				unsigned long w = *(unsigned long *) buff;
+				unsigned long w = *(unsigned int *) buff;
 				count--;
 				buff += 4;
 				result += carry;
@@ -87,7 +91,11 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		}
 	}
 	if (len & 1)
+#ifdef __LITTLE_ENDIAN
+		result += *buff;
+#else
 		result += (*buff << 8);
+#endif
 	result = from32to16(result);
 	if (odd)
 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index a65c314..e73822a 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -19,11 +19,10 @@
  */
 int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 {
-#ifdef CONFIG_SMP
 	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
 	if (atomic_add_unless(atomic, -1, 1))
 		return 0;
-#endif
+
 	/* Otherwise do it the slow way */
 	spin_lock(lock);
 	if (atomic_dec_and_test(atomic))
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 708e2a8..600f473 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -45,12 +45,14 @@
 */
 
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/bunzip2.h>
-#endif /* !STATIC */
+#include <linux/slab.h>
+#endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #ifndef INT_MAX
 #define INT_MAX 0x7fffffff
@@ -681,9 +683,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	set_error_fn(error_fn);
 	if (flush)
 		outbuf = malloc(BZIP2_IOBUF_SIZE);
-	else
-		len -= 4; /* Uncompressed size hack active in pre-boot
-			     environment */
+
 	if (!outbuf) {
 		error("Could not allocate output bufer");
 		return -1;
@@ -733,4 +733,14 @@ exit_0:
 	return i;
 }
 
-#define decompress bunzip2
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int len,
+			int(*fill)(void*, unsigned int),
+			int(*flush)(void*, unsigned int),
+			unsigned char *outbuf,
+			int *pos,
+			void(*error_fn)(char *x))
+{
+	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
+}
+#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index e36b296..68dfce5 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,13 +19,13 @@
 #include "zlib_inflate/inflate.h"
 
 #include "zlib_inflate/infutil.h"
+#include <linux/slab.h>
 
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
-#define INBUF_LEN (16*1024)
+#define GZIP_IOBUF_SIZE (16*1024)
 
 /* Included from initramfs et al code */
 STATIC int INIT gunzip(unsigned char *buf, int len,
@@ -55,7 +55,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	if (buf)
 		zbuf = buf;
 	else {
-		zbuf = malloc(INBUF_LEN);
+		zbuf = malloc(GZIP_IOBUF_SIZE);
 		len = 0;
 	}
 	if (!zbuf) {
@@ -77,7 +77,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	}
 
 	if (len == 0)
-		len = fill(zbuf, INBUF_LEN);
+		len = fill(zbuf, GZIP_IOBUF_SIZE);
 
 	/* verify the gzip header */
 	if (len < 10 ||
@@ -113,7 +113,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	while (rc == Z_OK) {
 		if (strm->avail_in == 0) {
 			/* TODO: handle case where both pos and fill are set */
-			len = fill(zbuf, INBUF_LEN);
+			len = fill(zbuf, GZIP_IOBUF_SIZE);
 			if (len < 0) {
 				rc = -1;
 				error("read error");
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32123a1..0b954e0 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -29,12 +29,14 @@
  *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/unlzma.h>
+#include <linux/slab.h>
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #define	MIN(a, b) (((a) < (b)) ? (a) : (b))
 
@@ -543,9 +545,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
 	int ret = -1;
 
 	set_error_fn(error_fn);
-	if (!flush)
-		in_len -= 4; /* Uncompressed size hack active in pre-boot
-				environment */
+
 	if (buf)
 		inbuf = buf;
 	else
@@ -645,4 +645,15 @@ exit_0:
 	return ret;
 }
 
-#define decompress unlzma
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int in_len,
+			      int(*fill)(void*, unsigned int),
+			      int(*flush)(void*, unsigned int),
+			      unsigned char *output,
+			      int *posp,
+			      void(*error_fn)(char *x)
+	)
+{
+	return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
+}
+#endif
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ad65fc0..65b0d99 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -262,11 +262,12 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 		 */
 		matches += 1;
 		match_lvl = 0;
-		entry->size      == ref->size      ? ++match_lvl : match_lvl;
-		entry->type      == ref->type      ? ++match_lvl : match_lvl;
-		entry->direction == ref->direction ? ++match_lvl : match_lvl;
+		entry->size         == ref->size         ? ++match_lvl : 0;
+		entry->type         == ref->type         ? ++match_lvl : 0;
+		entry->direction    == ref->direction    ? ++match_lvl : 0;
+		entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
 
-		if (match_lvl == 3) {
+		if (match_lvl == 4) {
 			/* perfect-fit - return the result */
 			return entry;
 		} else if (match_lvl > last_lvl) {
@@ -715,7 +716,7 @@ void dma_debug_init(u32 num_entries)
 
 	for (i = 0; i < HASH_SIZE; ++i) {
 		INIT_LIST_HEAD(&dma_entry_hash[i].list);
-		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
+		spin_lock_init(&dma_entry_hash[i].lock);
 	}
 
 	if (dma_debug_fs_init() != 0) {
@@ -855,90 +856,85 @@ static void check_for_stack(struct device *dev, void *addr)
 				"stack [addr=%p]\n", addr);
 }
 
-static inline bool overlap(void *addr, u64 size, void *start, void *end)
+static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
 {
-	void *addr2 = (char *)addr + size;
+	unsigned long a1 = (unsigned long)addr;
+	unsigned long b1 = a1 + len;
+	unsigned long a2 = (unsigned long)start;
+	unsigned long b2 = (unsigned long)end;
 
-	return ((addr >= start && addr < end) ||
-		(addr2 >= start && addr2 < end) ||
-		((addr < start) && (addr2 >= end)));
+	return !(b1 <= a2 || a1 >= b2);
 }
 
-static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
+static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
 {
-	if (overlap(addr, size, _text, _etext) ||
-	    overlap(addr, size, __start_rodata, __end_rodata))
-		err_printk(dev, NULL, "DMA-API: device driver maps "
-				"memory from kernel text or rodata "
-				"[addr=%p] [size=%llu]\n", addr, size);
+	if (overlap(addr, len, _text, _etext) ||
+	    overlap(addr, len, __start_rodata, __end_rodata))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
 }
 
-static void check_sync(struct device *dev, dma_addr_t addr,
-		       u64 size, u64 offset, int direction, bool to_cpu)
+static void check_sync(struct device *dev,
+		       struct dma_debug_entry *ref,
+		       bool to_cpu)
 {
-	struct dma_debug_entry ref = {
-		.dev            = dev,
-		.dev_addr       = addr,
-		.size           = size,
-		.direction      = direction,
-	};
 	struct dma_debug_entry *entry;
 	struct hash_bucket *bucket;
 	unsigned long flags;
 
-	bucket = get_hash_bucket(&ref, &flags);
+	bucket = get_hash_bucket(ref, &flags);
 
-	entry = hash_bucket_find(bucket, &ref);
+	entry = hash_bucket_find(bucket, ref);
 
 	if (!entry) {
 		err_printk(dev, NULL, "DMA-API: device driver tries "
 				"to sync DMA memory it has not allocated "
 				"[device address=0x%016llx] [size=%llu bytes]\n",
-				(unsigned long long)addr, size);
+				(unsigned long long)ref->dev_addr, ref->size);
 		goto out;
 	}
 
-	if ((offset + size) > entry->size) {
+	if (ref->size > entry->size) {
 		err_printk(dev, entry, "DMA-API: device driver syncs"
 				" DMA memory outside allocated range "
 				"[device address=0x%016llx] "
-				"[allocation size=%llu bytes] [sync offset=%llu] "
-				"[sync size=%llu]\n", entry->dev_addr, entry->size,
-				offset, size);
+				"[allocation size=%llu bytes] "
+				"[sync offset+size=%llu]\n",
+				entry->dev_addr, entry->size,
+				ref->size);
 	}
 
-	if (direction != entry->direction) {
+	if (ref->direction != entry->direction) {
 		err_printk(dev, entry, "DMA-API: device driver syncs "
 				"DMA memory with different direction "
 				"[device address=0x%016llx] [size=%llu bytes] "
 				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)addr, entry->size,
+				(unsigned long long)ref->dev_addr, entry->size,
 				dir2name[entry->direction],
-				dir2name[direction]);
+				dir2name[ref->direction]);
 	}
 
 	if (entry->direction == DMA_BIDIRECTIONAL)
 		goto out;
 
 	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
-		      !(direction == DMA_TO_DEVICE))
+		      !(ref->direction == DMA_TO_DEVICE))
 		err_printk(dev, entry, "DMA-API: device driver syncs "
 				"device read-only DMA memory for cpu "
 				"[device address=0x%016llx] [size=%llu bytes] "
 				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)addr, entry->size,
+				(unsigned long long)ref->dev_addr, entry->size,
 				dir2name[entry->direction],
-				dir2name[direction]);
+				dir2name[ref->direction]);
 
 	if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
-		       !(direction == DMA_FROM_DEVICE))
+		       !(ref->direction == DMA_FROM_DEVICE))
 		err_printk(dev, entry, "DMA-API: device driver syncs "
 				"device write-only DMA memory to device "
 				"[device address=0x%016llx] [size=%llu bytes] "
 				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)addr, entry->size,
+				(unsigned long long)ref->dev_addr, entry->size,
 				dir2name[entry->direction],
-				dir2name[direction]);
+				dir2name[ref->direction]);
 
 out:
 	put_hash_bucket(bucket, &flags);
@@ -972,7 +968,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 		entry->type = dma_debug_single;
 
 	if (!PageHighMem(page)) {
-		void *addr = ((char *)page_address(page)) + offset;
+		void *addr = page_address(page) + offset;
+
 		check_for_stack(dev, addr);
 		check_for_illegal_area(dev, addr, size);
 	}
@@ -1036,19 +1033,16 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(debug_dma_map_sg);
 
-static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
+static int get_nr_mapped_entries(struct device *dev,
+				 struct dma_debug_entry *ref)
 {
-	struct dma_debug_entry *entry, ref;
+	struct dma_debug_entry *entry;
 	struct hash_bucket *bucket;
 	unsigned long flags;
 	int mapped_ents;
 
-	ref.dev      = dev;
-	ref.dev_addr = sg_dma_address(s);
-	ref.size     = sg_dma_len(s),
-
-	bucket       = get_hash_bucket(&ref, &flags);
-	entry        = hash_bucket_find(bucket, &ref);
+	bucket       = get_hash_bucket(ref, &flags);
+	entry        = hash_bucket_find(bucket, ref);
 	mapped_ents  = 0;
 
 	if (entry)
@@ -1076,16 +1070,14 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = dir,
-			.sg_call_ents   = 0,
+			.sg_call_ents   = nelems,
 		};
 
 		if (mapped_ents && i >= mapped_ents)
 			break;
 
-		if (!i) {
-			ref.sg_call_ents = nelems;
-			mapped_ents = get_nr_mapped_entries(dev, s);
-		}
+		if (!i)
+			mapped_ents = get_nr_mapped_entries(dev, &ref);
 
 		check_unmap(&ref);
 	}
@@ -1140,10 +1132,19 @@ EXPORT_SYMBOL(debug_dma_free_coherent);
 void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 				   size_t size, int direction)
 {
+	struct dma_debug_entry ref;
+
 	if (unlikely(global_disable))
 		return;
 
-	check_sync(dev, dma_handle, size, 0, direction, true);
+	ref.type         = dma_debug_single;
+	ref.dev          = dev;
+	ref.dev_addr     = dma_handle;
+	ref.size         = size;
+	ref.direction    = direction;
+	ref.sg_call_ents = 0;
+
+	check_sync(dev, &ref, true);
 }
 EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
 
@@ -1151,10 +1152,19 @@ void debug_dma_sync_single_for_device(struct device *dev,
 				      dma_addr_t dma_handle, size_t size,
 				      int direction)
 {
+	struct dma_debug_entry ref;
+
 	if (unlikely(global_disable))
 		return;
 
-	check_sync(dev, dma_handle, size, 0, direction, false);
+	ref.type         = dma_debug_single;
+	ref.dev          = dev;
+	ref.dev_addr     = dma_handle;
+	ref.size         = size;
+	ref.direction    = direction;
+	ref.sg_call_ents = 0;
+
+	check_sync(dev, &ref, false);
 }
 EXPORT_SYMBOL(debug_dma_sync_single_for_device);
 
@@ -1163,10 +1173,19 @@ void debug_dma_sync_single_range_for_cpu(struct device *dev,
 					 unsigned long offset, size_t size,
 					 int direction)
 {
+	struct dma_debug_entry ref;
+
 	if (unlikely(global_disable))
 		return;
 
-	check_sync(dev, dma_handle, size, offset, direction, true);
+	ref.type         = dma_debug_single;
+	ref.dev          = dev;
+	ref.dev_addr     = dma_handle;
+	ref.size         = offset + size;
+	ref.direction    = direction;
+	ref.sg_call_ents = 0;
+
+	check_sync(dev, &ref, true);
 }
 EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
 
@@ -1175,10 +1194,19 @@ void debug_dma_sync_single_range_for_device(struct device *dev,
 					    unsigned long offset,
 					    size_t size, int direction)
 {
+	struct dma_debug_entry ref;
+
 	if (unlikely(global_disable))
 		return;
 
-	check_sync(dev, dma_handle, size, offset, direction, false);
+	ref.type         = dma_debug_single;
+	ref.dev          = dev;
+	ref.dev_addr     = dma_handle;
+	ref.size         = offset + size;
+	ref.direction    = direction;
+	ref.sg_call_ents = 0;
+
+	check_sync(dev, &ref, false);
 }
 EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
 
@@ -1192,14 +1220,24 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
+
+		struct dma_debug_entry ref = {
+			.type           = dma_debug_sg,
+			.dev            = dev,
+			.paddr          = sg_phys(s),
+			.dev_addr       = sg_dma_address(s),
+			.size           = sg_dma_len(s),
+			.direction      = direction,
+			.sg_call_ents   = nelems,
+		};
+
 		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, s);
+			mapped_ents = get_nr_mapped_entries(dev, &ref);
 
 		if (i >= mapped_ents)
 			break;
 
-		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
-			   direction, true);
+		check_sync(dev, &ref, true);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -1214,14 +1252,23 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
+
+		struct dma_debug_entry ref = {
+			.type           = dma_debug_sg,
+			.dev            = dev,
+			.paddr          = sg_phys(s),
+			.dev_addr       = sg_dma_address(s),
+			.size           = sg_dma_len(s),
+			.direction      = direction,
+			.sg_call_ents   = nelems,
+		};
 		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, s);
+			mapped_ents = get_nr_mapped_entries(dev, &ref);
 
 		if (i >= mapped_ents)
 			break;
 
-		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
-			   direction, false);
+		check_sync(dev, &ref, false);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 833139c..e22c148 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -164,7 +164,7 @@ static void ddebug_change(const struct ddebug_query *query,
 
 			if (!newflags)
 				dt->num_enabled--;
-			else if (!dp-flags)
+			else if (!dp->flags)
 				dt->num_enabled++;
 			dp->flags = newflags;
 			if (newflags) {
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 0000000..08f1636
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,267 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+	char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+static inline int __elements_per_part(int element_size)
+{
+	return FLEX_ARRAY_PART_SIZE / element_size;
+}
+
+static inline int bytes_left_in_base(void)
+{
+	int element_offset = offsetof(struct flex_array, parts);
+	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
+	return bytes_left;
+}
+
+static inline int nr_base_part_ptrs(void)
+{
+	return bytes_left_in_base() / sizeof(struct flex_array_part *);
+}
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+	int data_size = fa->element_size * fa->total_nr_elements;
+	if (data_size <= bytes_left_in_base())
+		return 1;
+	return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size:	the size of individual elements in the array
+ * @total:		total number of elements that this should hold
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures.  If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities.  Note that the maximum
+ * index that the get/put() functions is just nr_objects-1.   This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k |  32-bit |  64-bit |
+ * ---------------------------------|
+ *      1 bytes | 4186112 | 2093056 |
+ *      2 bytes | 2093056 | 1046528 |
+ *      3 bytes | 1395030 |  697515 |
+ *      4 bytes | 1046528 |  523264 |
+ *     32 bytes |  130816 |   65408 |
+ *     33 bytes |  126728 |   63364 |
+ *   2048 bytes |    2044 |    1022 |
+ *   2049 bytes |    1022 |     511 |
+ *       void * | 1046528 |  261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure.  Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
+{
+	struct flex_array *ret;
+	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+
+	/* max_size will end up 0 if element_size > PAGE_SIZE */
+	if (total > max_size)
+		return NULL;
+	ret = kzalloc(sizeof(struct flex_array), flags);
+	if (!ret)
+		return NULL;
+	ret->element_size = element_size;
+	ret->total_nr_elements = total;
+	return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
+{
+	return element_nr / __elements_per_part(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+	int part_nr;
+	int max_part = nr_base_part_ptrs();
+
+	if (elements_fit_in_base(fa))
+		return;
+	for (part_nr = 0; part_nr < max_part; part_nr++)
+		kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+	flex_array_free_parts(fa);
+	kfree(fa);
+}
+
+static int fa_index_inside_part(struct flex_array *fa, int element_nr)
+{
+	return element_nr % __elements_per_part(fa->element_size);
+}
+
+static int index_inside_part(struct flex_array *fa, int element_nr)
+{
+	int part_offset = fa_index_inside_part(fa, element_nr);
+	return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+	struct flex_array_part *part = fa->parts[part_nr];
+	if (!part) {
+		/*
+		 * This leaves the part pages uninitialized
+		 * and with potentially random data, just
+		 * as if the user had kmalloc()'d the whole.
+		 * __GFP_ZERO can be used to zero it.
+		 */
+		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		if (!part)
+			return NULL;
+		fa->parts[part_nr] = part;
+	}
+	return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * Note that this *copies* the contents of @src into
+ * the array.  If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = __fa_get_part(fa, part_nr, flags);
+	if (!part)
+		return -ENOMEM;
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memcpy(dst, src, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @start:	index of first array element for which space is allocated
+ * @end:	index of last (inclusive) element for which space is allocated
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory.  It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
+{
+	int start_part;
+	int end_part;
+	int part_nr;
+	struct flex_array_part *part;
+
+	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		return 0;
+	start_part = fa_element_to_part_nr(fa, start);
+	end_part = fa_element_to_part_nr(fa, end);
+	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr.  Note
+ * that this is a copy of the data that was passed in.  If you
+ * are using this to store pointers, you'll get back &ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+	if (!fa->parts[part_nr])
+		return NULL;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = fa->parts[part_nr];
+	return &part->elements[index_inside_part(fa, element_nr)];
+}
diff --git a/lib/gcd.c b/lib/gcd.c
new file mode 100644
index 0000000..f879033
--- /dev/null
+++ b/lib/gcd.c
@@ -0,0 +1,18 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Greatest common divisor */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+	unsigned long r;
+
+	if (a < b)
+		swap(a, b);
+	while ((r = a % b) != 0) {
+		a = b;
+		b = r;
+	}
+	return b;
+}
+EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index f6d276d..eed2bdb 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -85,7 +85,6 @@ void gen_pool_destroy(struct gen_pool *pool)
 	int bit, end_bit;
 
 
-	write_lock(&pool->lock);
 	list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 		list_del(&chunk->next_chunk);
diff --git a/lib/hexdump.c b/lib/hexdump.c
index f07c0db..39af256 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -65,7 +65,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%16.16llx ", (unsigned long long)*(ptr8 + j));
+				"%s%16.16llx", j ? " " : "",
+				(unsigned long long)*(ptr8 + j));
 		ascii_column = 17 * ngroups + 2;
 		break;
 	}
@@ -76,7 +77,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%8.8x ", *(ptr4 + j));
+				"%s%8.8x", j ? " " : "", *(ptr4 + j));
 		ascii_column = 9 * ngroups + 2;
 		break;
 	}
@@ -87,19 +88,21 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%4.4x ", *(ptr2 + j));
+				"%s%4.4x", j ? " " : "", *(ptr2 + j));
 		ascii_column = 5 * ngroups + 2;
 		break;
 	}
 
 	default:
-		for (j = 0; (j < rowsize) && (j < len) && (lx + 4) < linebuflen;
-		     j++) {
+		for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
 			ch = ptr[j];
 			linebuf[lx++] = hex_asc_hi(ch);
 			linebuf[lx++] = hex_asc_lo(ch);
 			linebuf[lx++] = ' ';
 		}
+		if (j)
+			lx--;
+
 		ascii_column = 3 * rowsize + 2;
 		break;
 	}
@@ -108,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
 		linebuf[lx++] = ' ';
-	for (j = 0; (j < rowsize) && (j < len) && (lx + 2) < linebuflen; j++)
+	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
 		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
 				: '.';
 nil:
diff --git a/lib/kobject.c b/lib/kobject.c
index bacf6fe..b512b74 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -793,11 +793,16 @@ static struct kset *kset_create(const char *name,
 				struct kobject *parent_kobj)
 {
 	struct kset *kset;
+	int retval;
 
 	kset = kzalloc(sizeof(*kset), GFP_KERNEL);
 	if (!kset)
 		return NULL;
-	kobject_set_name(&kset->kobj, name);
+	retval = kobject_set_name(&kset->kobj, name);
+	if (retval) {
+		kfree(kset);
+		return NULL;
+	}
 	kset->uevent_ops = uevent_ops;
 	kset->kobj.parent = parent_kobj;
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 4bb42a0..23abbd9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -351,20 +351,12 @@ int radix_tree_insert(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_insert);
 
-/**
- *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
- *	@root:		radix tree root
- *	@index:		index key
- *
- *	Returns:  the slot corresponding to the position @index in the
- *	radix tree @root. This is useful for update-if-exists operations.
- *
- *	This function can be called under rcu_read_lock iff the slot is not
- *	modified by radix_tree_replace_slot, otherwise it must be called
- *	exclusive from other writers. Any dereference of the slot must be done
- *	using radix_tree_deref_slot.
+/*
+ * is_slot == 1 : search for the slot.
+ * is_slot == 0 : search for the node.
  */
-void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+static void *radix_tree_lookup_element(struct radix_tree_root *root,
+				unsigned long index, int is_slot)
 {
 	unsigned int height, shift;
 	struct radix_tree_node *node, **slot;
@@ -376,7 +368,7 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 	if (!radix_tree_is_indirect_ptr(node)) {
 		if (index > 0)
 			return NULL;
-		return (void **)&root->rnode;
+		return is_slot ? (void *)&root->rnode : node;
 	}
 	node = radix_tree_indirect_to_ptr(node);
 
@@ -397,7 +389,25 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 		height--;
 	} while (height > 0);
 
-	return (void **)slot;
+	return is_slot ? (void *)slot:node;
+}
+
+/**
+ *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *
+ *	Returns:  the slot corresponding to the position @index in the
+ *	radix tree @root. This is useful for update-if-exists operations.
+ *
+ *	This function can be called under rcu_read_lock iff the slot is not
+ *	modified by radix_tree_replace_slot, otherwise it must be called
+ *	exclusive from other writers. Any dereference of the slot must be done
+ *	using radix_tree_deref_slot.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+	return (void **)radix_tree_lookup_element(root, index, 1);
 }
 EXPORT_SYMBOL(radix_tree_lookup_slot);
 
@@ -415,38 +425,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot);
  */
 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 {
-	unsigned int height, shift;
-	struct radix_tree_node *node, **slot;
-
-	node = rcu_dereference(root->rnode);
-	if (node == NULL)
-		return NULL;
-
-	if (!radix_tree_is_indirect_ptr(node)) {
-		if (index > 0)
-			return NULL;
-		return node;
-	}
-	node = radix_tree_indirect_to_ptr(node);
-
-	height = node->height;
-	if (index > radix_tree_maxindex(height))
-		return NULL;
-
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
-	do {
-		slot = (struct radix_tree_node **)
-			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-		node = rcu_dereference(*slot);
-		if (node == NULL)
-			return NULL;
-
-		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
-	} while (height > 0);
-
-	return node;
+	return radix_tree_lookup_element(root, index, 0);
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
@@ -666,6 +645,43 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_next_hole);
 
+/**
+ *	radix_tree_prev_hole    -    find the prev hole (not-present entry)
+ *	@root:		tree root
+ *	@index:		index key
+ *	@max_scan:	maximum range to search
+ *
+ *	Search backwards in the range [max(index-max_scan+1, 0), index]
+ *	for the first hole.
+ *
+ *	Returns: the index of the hole if found, otherwise returns an index
+ *	outside of the set specified (in which case 'index - return >= max_scan'
+ *	will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *
+ *	radix_tree_next_hole may be called under rcu_read_lock. However, like
+ *	radix_tree_gang_lookup, this will not atomically search a snapshot of
+ *	the tree at a single point in time. For example, if a hole is created
+ *	at index 10, then subsequently a hole is created at index 5,
+ *	radix_tree_prev_hole covering both indexes may return 5 if called under
+ *	rcu_read_lock.
+ */
+unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+				   unsigned long index, unsigned long max_scan)
+{
+	unsigned long i;
+
+	for (i = 0; i < max_scan; i++) {
+		if (!radix_tree_lookup(root, index))
+			break;
+		index--;
+		if (index == LONG_MAX)
+			break;
+	}
+
+	return index;
+}
+EXPORT_SYMBOL(radix_tree_prev_hole);
+
 static unsigned int
 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
diff --git a/lib/rbtree.c b/lib/rbtree.c
index f653659..e2aa3be 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -231,34 +231,34 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		node = node->rb_right;
 		while ((left = node->rb_left) != NULL)
 			node = left;
+
+		if (rb_parent(old)) {
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
 		child = node->rb_right;
 		parent = rb_parent(node);
 		color = rb_color(node);
 
-		if (child)
-			rb_set_parent(child, parent);
 		if (parent == old) {
-			parent->rb_right = child;
 			parent = node;
-		} else
+		} else {
+			if (child)
+				rb_set_parent(child, parent);
 			parent->rb_left = child;
 
+			node->rb_right = old->rb_right;
+			rb_set_parent(old->rb_right, node);
+		}
+
 		node->rb_parent_color = old->rb_parent_color;
-		node->rb_right = old->rb_right;
 		node->rb_left = old->rb_left;
-
-		if (rb_parent(old))
-		{
-			if (rb_parent(old)->rb_left == old)
-				rb_parent(old)->rb_left = node;
-			else
-				rb_parent(old)->rb_right = node;
-		} else
-			root->rb_node = node;
-
 		rb_set_parent(old->rb_left, node);
-		if (old->rb_right)
-			rb_set_parent(old->rb_right, node);
+
 		goto color;
 	}
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a295e40..0d475d8 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -314,6 +314,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
 	miter->__sg = sgl;
 	miter->__nents = nents;
 	miter->__offset = 0;
+	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
 	miter->__flags = flags;
 }
 EXPORT_SYMBOL(sg_miter_start);
@@ -394,6 +395,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 	if (miter->addr) {
 		miter->__offset += miter->consumed;
 
+		if (miter->__flags & SG_MITER_TO_SG)
+			flush_kernel_dcache_page(miter->page);
+
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
 			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
@@ -426,8 +430,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
 	unsigned long flags;
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+
+	if (to_buffer)
+		sg_flags |= SG_MITER_FROM_SG;
+	else
+		sg_flags |= SG_MITER_TO_SG;
 
-	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+	sg_miter_start(&miter, sgl, nents, sg_flags);
 
 	local_irq_save(flags);
 
@@ -438,10 +448,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 		if (to_buffer)
 			memcpy(buf + offset, miter.addr, len);
-		else {
+		else
 			memcpy(miter.addr, buf + offset, len);
-			flush_kernel_dcache_page(miter.page);
-		}
 
 		offset += len;
 	}