149 files changed, 6247 insertions, 2569 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index 7671352..b349d57 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -1,8 +1,8 @@
 
-	BTRFS
-	=====
+BTRFS
+=====
 
-Btrfs is a new copy on write filesystem for Linux aimed at
+Btrfs is a copy on write filesystem for Linux aimed at
 implementing advanced features while focusing on fault tolerance,
 repair and easy administration. Initially developed by Oracle, Btrfs
 is licensed under the GPL and open for contribution from anyone.
@@ -34,9 +34,175 @@ The main Btrfs features include:
     * Online filesystem defragmentation
 
 
+Mount Options
+=============
 
-	MAILING LIST
-	============
+When mounting a btrfs filesystem, the following option are accepted.
+Unless otherwise specified, all options default to off.
+
+  alloc_start=<bytes>
+	Debugging option to force all block allocations above a certain
+	byte threshold on each block device.  The value is specified in
+	bytes, optionally with a K, M, or G suffix, case insensitive.
+	Default is 1MB.
+
+  autodefrag
+	Detect small random writes into files and queue them up for the
+	defrag process.  Works best for small files; Not well suited for
+	large database workloads.
+
+  check_int
+  check_int_data
+  check_int_print_mask=<value>
+	These debugging options control the behavior of the integrity checking
+	module (the BTRFS_FS_CHECK_INTEGRITY config option required).
+
+	check_int enables the integrity checker module, which examines all
+	block write requests to ensure on-disk consistency, at a large
+	memory and CPU cost.  
+
+	check_int_data includes extent data in the integrity checks, and
+	implies the check_int option.
+
+	check_int_print_mask takes a bitmask of BTRFSIC_PRINT_MASK_* values
+	as defined in fs/btrfs/check-integrity.c, to control the integrity
+	checker module behavior.
+
+	See comments at the top of fs/btrfs/check-integrity.c for more info.
+
+  compress
+  compress=<type>
+  compress-force
+  compress-force=<type>
+	Control BTRFS file data compression.  Type may be specified as "zlib"
+	"lzo" or "no" (for no compression, used for remounting).  If no type
+	is specified, zlib is used.  If compress-force is specified,
+	all files will be compressed, whether or not they compress well.
+	If compression is enabled, nodatacow and nodatasum are disabled.
+
+  degraded
+	Allow mounts to continue with missing devices.  A read-write mount may
+	fail with too many devices missing, for example if a stripe member
+	is completely missing.
+
+  device=<devicepath>
+	Specify a device during mount so that ioctls on the control device
+	can be avoided.  Especialy useful when trying to mount a multi-device
+	setup as root.  May be specified multiple times for multiple devices.
+
+  discard
+	Issue frequent commands to let the block device reclaim space freed by
+	the filesystem.  This is useful for SSD devices, thinly provisioned
+	LUNs and virtual machine images, but may have a significant
+	performance impact.  (The fstrim command is also available to
+	initiate batch trims from userspace).
+
+  enospc_debug
+	Debugging option to be more verbose in some ENOSPC conditions.
+
+  fatal_errors=<action>
+	Action to take when encountering a fatal error: 
+	  "bug" - BUG() on a fatal error.  This is the default.
+	  "panic" - panic() on a fatal error.
+
+  flushoncommit
+	The 'flushoncommit' mount option forces any data dirtied by a write in a
+	prior transaction to commit as part of the current commit.  This makes
+	the committed state a fully consistent view of the file system from the
+	application's perspective (i.e., it includes all completed file system
+	operations).  This was previously the behavior only when a snapshot is
+	created.
+
+  inode_cache
+	Enable free inode number caching.   Defaults to off due to an overflow
+	problem when the free space crcs don't fit inside a single page.
+
+  max_inline=<bytes>
+	Specify the maximum amount of space, in bytes, that can be inlined in
+	a metadata B-tree leaf.  The value is specified in bytes, optionally 
+	with a K, M, or G suffix, case insensitive.  In practice, this value
+	is limited by the root sector size, with some space unavailable due
+	to leaf headers.  For a 4k sectorsize, max inline data is ~3900 bytes.
+
+  metadata_ratio=<value>
+	Specify that 1 metadata chunk should be allocated after every <value>
+	data chunks.  Off by default.
+
+  noacl
+	Disable support for Posix Access Control Lists (ACLs).  See the
+	acl(5) manual page for more information about ACLs.
+
+  nobarrier
+        Disables the use of block layer write barriers.  Write barriers ensure
+	that certain IOs make it through the device cache and are on persistent
+	storage.  If used on a device with a volatile (non-battery-backed)
+	write-back cache, this option will lead to filesystem corruption on a
+	system crash or power loss.
+
+  nodatacow
+	Disable data copy-on-write for newly created files.  Implies nodatasum,
+	and disables all compression.
+
+  nodatasum
+	Disable data checksumming for newly created files.
+
+  notreelog
+	Disable the tree logging used for fsync and O_SYNC writes.
+
+  recovery
+	Enable autorecovery attempts if a bad tree root is found at mount time.
+	Currently this scans a list of several previous tree roots and tries to 
+	use the first readable.
+
+ skip_balance
+	Skip automatic resume of interrupted balance operation after mount.
+	May be resumed with "btrfs balance resume."
+
+  space_cache (*)
+	Enable the on-disk freespace cache.
+  nospace_cache
+	Disable freespace cache loading without clearing the cache.
+  clear_cache
+	Force clearing and rebuilding of the disk space cache if something
+	has gone wrong.
+
+  ssd
+  nossd
+  ssd_spread
+	Options to control ssd allocation schemes.  By default, BTRFS will
+	enable or disable ssd allocation heuristics depending on whether a
+	rotational or nonrotational disk is in use.  The ssd and nossd options
+	can override this autodetection.
+
+	The ssd_spread mount option attempts to allocate into big chunks
+	of unused space, and may perform better on low-end ssds.  ssd_spread
+	implies ssd, enabling all other ssd heuristics as well.
+
+  subvol=<path>
+	Mount subvolume at <path> rather than the root subvolume.  <path> is
+	relative to the top level subvolume.
+
+  subvolid=<ID>
+	Mount subvolume specified by an ID number rather than the root subvolume.
+	This allows mounting of subvolumes which are not in the root of the mounted
+	filesystem.
+	You can use "btrfs subvolume list" to see subvolume ID numbers.
+
+  subvolrootid=<objectid> (deprecated)
+	Mount subvolume specified by <objectid> rather than the root subvolume.
+	This allows mounting of subvolumes which are not in the root of the mounted
+	filesystem.
+	You can use "btrfs subvolume show " to see the object ID for a subvolume.
+	
+  thread_pool=<number>
+	The number of worker threads to allocate.  The default number is equal
+	to the number of CPUs + 2, or 8, whichever is smaller.
+
+  user_subvol_rm_allowed
+	Allow subvolumes to be deleted by a non-root user. Use with caution. 
+
+MAILING LIST
+============
 
 There is a Btrfs mailing list hosted on vger.kernel.org. You can
 find details on how to subscribe here:
@@ -49,8 +215,8 @@ http://dir.gmane.org/gmane.comp.file-systems.btrfs
 
 
 
-	IRC
-	===
+IRC
+===
 
 Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
 IRC network.
diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt
new file mode 100644
index 0000000..2b1af76
--- /dev/null
+++ b/Documentation/xtensa/mmu.txt
@@ -0,0 +1,46 @@
+MMUv3 initialization sequence.
+
+The code in the initialize_mmu macro sets up MMUv3 memory mapping
+identically to MMUv2 fixed memory mapping. Depending on
+CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
+located in one of the following address ranges:
+
+    0xF0000000..0xFFFFFFFF (will keep same address in MMU v2 layout;
+    			 typically ROM)
+    0x00000000..0x07FFFFFF (system RAM; this code is actually linked
+    			 at 0xD0000000..0xD7FFFFFF [cached]
+    			 or 0xD8000000..0xDFFFFFFF [uncached];
+    			 in any case, initially runs elsewhere
+    			 than linked, so have to be careful)
+
+The code has the following assumptions:
+  This code fragment is run only on an MMU v3.
+  TLBs are in their reset state.
+  ITLBCFG and DTLBCFG are zero (reset state).
+  RASID is 0x04030201 (reset state).
+  PS.RING is zero (reset state).
+  LITBASE is zero (reset state, PC-relative literals); required to be PIC.
+
+TLB setup proceeds along the following steps.
+
+  Legend:
+    VA = virtual address (two upper nibbles of it);
+    PA = physical address (two upper nibbles of it);
+    pc = physical range that contains this code;
+
+After step 2, we jump to virtual address in 0x40000000..0x5fffffff
+that corresponds to next instruction to execute in this code.
+After step 4, we jump to intended (linked) address of this code.
+
+    Step 0     Step1     Step 2     Step3     Step 4     Step5
+ ============  =====  ============  =====  ============  =====
+   VA      PA     PA    VA      PA     PA    VA      PA     PA
+ ------    --     --  ------    --     --  ------    --     --
+ E0..FF -> E0  -> E0  E0..FF -> E0         F0..FF -> F0  -> F0
+ C0..DF -> C0  -> C0  C0..DF -> C0         E0..EF -> F0  -> F0
+ A0..BF -> A0  -> A0  A0..BF -> A0         D8..DF -> 00  -> 00
+ 80..9F -> 80  -> 80  80..9F -> 80         D0..D7 -> 00  -> 00
+ 60..7F -> 60  -> 60  60..7F -> 60
+ 40..5F -> 40         40..5F -> pc  -> pc  40..5F -> pc
+ 20..3F -> 20  -> 20  20..3F -> 20
+ 00..1F -> 00  -> 00  00..1F -> 00
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index e6f4eca..491ae79 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -16,8 +16,6 @@ config ARC
 	select GENERIC_FIND_FIRST_BIT
 	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_EXECVE
-	select GENERIC_KERNEL_THREAD
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_SMP_IDLE_THREAD
 	select HAVE_ARCH_KGDB
@@ -61,9 +59,6 @@ config GENERIC_CALIBRATE_DELAY
 config GENERIC_HWEIGHT
 	def_bool y
 
-config BINFMT_ELF
-	def_bool y
-
 config STACKTRACE_SUPPORT
 	def_bool y
 	select STACKTRACE
@@ -82,6 +77,7 @@ menu "ARC Architecture Configuration"
 menu "ARC Platform/SoC/Board"
 
 source "arch/arc/plat-arcfpga/Kconfig"
+source "arch/arc/plat-tb10x/Kconfig"
 #New platform adds here
 
 endmenu
@@ -134,9 +130,6 @@ if SMP
 config ARC_HAS_COH_CACHES
 	def_bool n
 
-config ARC_HAS_COH_LLSC
-	def_bool n
-
 config ARC_HAS_COH_RTSC
 	def_bool n
 
@@ -304,6 +297,9 @@ config ARC_FPU_SAVE_RESTORE
 	  based on actual usage of FPU by a task. Thus our implemn does
 	  this for all tasks in system.
 
+config ARC_CANT_LLSC
+	def_bool n
+
 menuconfig ARC_CPU_REL_4_10
 	bool "Enable support for Rel 4.10 features"
 	default n
@@ -314,9 +310,7 @@ menuconfig ARC_CPU_REL_4_10
 config ARC_HAS_LLSC
 	bool "Insn: LLOCK/SCOND (efficient atomic ops)"
 	default y
-	depends on ARC_CPU_770
-	# if SMP, enable LLSC ONLY if ARC implementation has coherent atomics
-	depends on !SMP || ARC_HAS_COH_LLSC
+	depends on ARC_CPU_770 && !ARC_CANT_LLSC
 
 config ARC_HAS_SWAPE
 	bool "Insn: SWAPE (endian-swap)"
@@ -415,13 +409,6 @@ config ARC_DBG_TLB_MISS_COUNT
 	  Counts number of I and D TLB Misses and exports them via Debugfs
 	  The counters can be cleared via Debugfs as well
 
-config CMDLINE
-	string "Kernel command line to built-in"
-	default "print-fatal-signals=1"
-	help
-	  The default command line which will be appended to the optional
-	  u-boot provided command line (see below)
-
 config CMDLINE_UBOOT
 	bool "Support U-boot kernel command line passing"
 	default n
@@ -430,8 +417,8 @@ config CMDLINE_UBOOT
 	  command line from the U-boot environment to the Linux kernel then
 	  switch this option on.
 	  ARC U-boot will setup the cmdline in RAM/flash and set r2 to point
-	  to it. kernel startup code will copy the string into cmdline buffer
-	  and also append CONFIG_CMDLINE.
+	  to it. kernel startup code will append this to DeviceTree
+	  /bootargs provided cmdline args.
 
 config ARC_BUILTIN_DTB_NAME
 	string "Built in DTB"
@@ -441,6 +428,10 @@ config ARC_BUILTIN_DTB_NAME
 
 source "kernel/Kconfig.preempt"
 
+menu "Executable file formats"
+source "fs/Kconfig.binfmt"
+endmenu
+
 endmenu	 # "ARC Architecture Configuration"
 
 source "mm/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 92379c7..183397f 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -8,6 +8,10 @@
 
 UTS_MACHINE := arc
 
+ifeq ($(CROSS_COMPILE),)
+CROSS_COMPILE := arc-elf32-
+endif
+
 KBUILD_DEFCONFIG := fpga_defconfig
 
 cflags-y	+= -mA7 -fno-common -pipe -fno-builtin -D__linux__
@@ -87,20 +91,23 @@ core-y		+= arch/arc/
 core-y		+= arch/arc/boot/dts/
 
 core-$(CONFIG_ARC_PLAT_FPGA_LEGACY)	+= arch/arc/plat-arcfpga/
+core-$(CONFIG_ARC_PLAT_TB10X)		+= arch/arc/plat-tb10x/
 
 drivers-$(CONFIG_OPROFILE)	+= arch/arc/oprofile/
 
 libs-y		+= arch/arc/lib/ $(LIBGCC)
 
+boot		:= arch/arc/boot
+
 #default target for make without any arguements.
-KBUILD_IMAGE := bootpImage
+KBUILD_IMAGE	:= bootpImage
 
 all:	$(KBUILD_IMAGE)
-boot	:= arch/arc/boot
-
 bootpImage: vmlinux
 
-uImage: vmlinux
+boot_targets += uImage uImage.bin uImage.gz
+
+$(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 %.dtb %.dtb.S %.dtb.o: scripts
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index 7d514c2..e597cb34 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -3,7 +3,6 @@ targets := vmlinux.bin vmlinux.bin.gz uImage
 # uImage build relies on mkimage being availble on your host for ARC target
 # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
 # and make sure it's reacable from your PATH
-MKIMAGE := $(srctree)/scripts/mkuboot.sh
 
 OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
 
@@ -12,7 +11,12 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \
 
 UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)
 UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
-UIMAGE_COMPRESSION = gzip
+
+suffix-y := bin
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+
+targets += uImage uImage.bin uImage.gz
+extra-y += vmlinux.bin vmlinux.bin.gz
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
@@ -20,7 +24,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,gzip)
 
-$(obj)/uImage: $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,uimage)
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,uimage,none)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,uimage,gzip)
 
-PHONY += FORCE
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+	@ln -sf $(notdir $<) $@
+	@echo '  Image $@ is ready'
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index 5776835..faf240e 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -8,6 +8,8 @@ endif
 obj-y   += $(builtindtb-y).dtb.o
 targets += $(builtindtb-y).dtb
 
+.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
+
 dtbs:  $(addprefix  $(obj)/, $(builtindtb-y).dtb)
 
-clean-files := *.dtb
+clean-files := *.dtb  *.dtb.S
diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi
new file mode 100644
index 0000000..941ad11
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb100.dtsi
@@ -0,0 +1,340 @@
+/*
+ * Abilis Systems TB100 SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/include/ "abilis_tb10x.dtsi"
+
+/* interrupt specifiers
+ * --------------------
+ * 0: rising, 1: low, 2: high, 3: falling,
+ */
+
+/ {
+	clock-frequency		= <500000000>;	/* 500 MHZ */
+
+	soc100 {
+		bus-frequency	= <166666666>;
+
+		pll0: oscillator {
+			clock-frequency  = <1000000000>;
+		};
+		cpu_clk: clkdiv_cpu {
+			clock-mult = <1>;
+			clock-div = <2>;
+		};
+		ahb_clk: clkdiv_ahb {
+			clock-mult = <1>;
+			clock-div = <6>;
+		};
+
+		iomux: iomux@FF10601c {
+			/* Port 1 */
+			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
+				pingrp = "mis0_pins";
+			};
+			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
+				pingrp = "mis1_pins";
+			};
+			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
+				pingrp = "gpioa_pins";
+			};
+			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
+				pingrp = "mip1_pins";
+			};
+			/* Port 2 */
+			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
+				pingrp = "mis2_pins";
+			};
+			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
+				pingrp = "mis3_pins";
+			};
+			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
+				pingrp = "gpioc_pins";
+			};
+			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
+				pingrp = "mip3_pins";
+			};
+			/* Port 3 */
+			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
+				pingrp = "mis4_pins";
+			};
+			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
+				pingrp = "mis5_pins";
+			};
+			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
+				pingrp = "gpioe_pins";
+			};
+			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
+				pingrp = "mip5_pins";
+			};
+			/* Port 4 */
+			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
+				pingrp = "mis6_pins";
+			};
+			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
+				pingrp = "mis7_pins";
+			};
+			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
+				pingrp = "gpiog_pins";
+			};
+			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
+				pingrp = "mip7_pins";
+			};
+			/* Port 5 */
+			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
+				pingrp = "gpioj_pins";
+			};
+			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
+				pingrp = "gpiok_pins";
+			};
+			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
+				pingrp = "ciplus_pins";
+			};
+			pctl_mcard: pctl-mcard {       /* M-Card interface */
+				pingrp = "mcard_pins";
+			};
+			/* Port 6 */
+			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
+				pingrp = "mop_pins";
+			};
+			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
+				pingrp = "mos0_pins";
+			};
+			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
+				pingrp = "mos1_pins";
+			};
+			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
+				pingrp = "mos2_pins";
+			};
+			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
+				pingrp = "mos3_pins";
+			};
+			/* Port 7 */
+			pctl_uart0: pctl-uart0 {       /* UART 0 */
+				pingrp = "uart0_pins";
+			};
+			pctl_uart1: pctl-uart1 {       /* UART 1 */
+				pingrp = "uart1_pins";
+			};
+			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
+				pingrp = "gpiol_pins";
+			};
+			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
+				pingrp = "gpiom_pins";
+			};
+			/* Port 8 */
+			pctl_spi3: pctl-spi3 {
+				pingrp = "spi3_pins";
+			};
+			/* Port 9 */
+			pctl_spi1: pctl-spi1 {
+				pingrp = "spi1_pins";
+			};
+			pctl_gpio_n: pctl-gpio-n {
+				pingrp = "gpion_pins";
+			};
+			/* Unmuxed GPIOs */
+			pctl_gpio_b: pctl-gpio-b {
+				pingrp = "gpiob_pins";
+			};
+			pctl_gpio_d: pctl-gpio-d {
+				pingrp = "gpiod_pins";
+			};
+			pctl_gpio_f: pctl-gpio-f {
+				pingrp = "gpiof_pins";
+			};
+			pctl_gpio_h: pctl-gpio-h {
+				pingrp = "gpioh_pins";
+			};
+			pctl_gpio_i: pctl-gpio-i {
+				pingrp = "gpioi_pins";
+			};
+		};
+
+		gpioa: gpio@FF140000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF140000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <0>;
+			gpio-pins = <&pctl_gpio_a>;
+		};
+		gpiob: gpio@FF141000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF141000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <3>;
+			gpio-pins = <&pctl_gpio_b>;
+		};
+		gpioc: gpio@FF142000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF142000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <5>;
+			gpio-pins = <&pctl_gpio_c>;
+		};
+		gpiod: gpio@FF143000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF143000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <8>;
+			gpio-pins = <&pctl_gpio_d>;
+		};
+		gpioe: gpio@FF144000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF144000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <10>;
+			gpio-pins = <&pctl_gpio_e>;
+		};
+		gpiof: gpio@FF145000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF145000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <13>;
+			gpio-pins = <&pctl_gpio_f>;
+		};
+		gpiog: gpio@FF146000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF146000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <15>;
+			gpio-pins = <&pctl_gpio_g>;
+		};
+		gpioh: gpio@FF147000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF147000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <18>;
+			gpio-pins = <&pctl_gpio_h>;
+		};
+		gpioi: gpio@FF148000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF148000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <20>;
+			gpio-pins = <&pctl_gpio_i>;
+		};
+		gpioj: gpio@FF149000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF149000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <32>;
+			gpio-pins = <&pctl_gpio_j>;
+		};
+		gpiok: gpio@FF14a000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14A000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <64>;
+			gpio-pins = <&pctl_gpio_k>;
+		};
+		gpiol: gpio@FF14b000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14B000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <86>;
+			gpio-pins = <&pctl_gpio_l>;
+		};
+		gpiom: gpio@FF14c000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14C000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <90>;
+			gpio-pins = <&pctl_gpio_m>;
+		};
+		gpion: gpio@FF14d000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14D000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <94>;
+			gpio-pins = <&pctl_gpio_n>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
new file mode 100644
index 0000000..c0fd362
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -0,0 +1,127 @@
+/*
+ * Abilis Systems TB100 Development Kit PCB device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/dts-v1/;
+
+/include/ "abilis_tb100.dtsi"
+
+/ {
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
+	};
+
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x08000000>;	/* 128M */
+	};
+
+	soc100 {
+		uart@FF100000 {
+			pinctrl-names = "abilis,simple-default";
+			pinctrl-0 = <&pctl_uart0>;
+		};
+		ethernet@FE100000 {
+			phy-mode = "rgmii";
+		};
+
+		i2c0: i2c@FF120000 {
+			sda-hold-time = <432>;
+		};
+		i2c1: i2c@FF121000 {
+			sda-hold-time = <432>;
+		};
+		i2c2: i2c@FF122000 {
+			sda-hold-time = <432>;
+		};
+		i2c3: i2c@FF123000 {
+			sda-hold-time = <432>;
+		};
+		i2c4: i2c@FF124000 {
+			sda-hold-time = <432>;
+		};
+
+		leds {
+			compatible = "gpio-leds";
+			power {
+				label = "Power";
+				gpios = <&gpioi 0>;
+				linux,default-trigger = "default-on";
+			};
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpioi 1>;
+				linux,default-trigger = "heartbeat";
+			};
+			led2 {
+				label = "LED2";
+				gpios = <&gpioi 2>;
+				default-state = "off";
+			};
+			led3 {
+				label = "LED3";
+				gpios = <&gpioi 3>;
+				default-state = "off";
+			};
+			led4 {
+				label = "LED4";
+				gpios = <&gpioi 4>;
+				default-state = "off";
+			};
+			led5 {
+				label = "LED5";
+				gpios = <&gpioi 5>;
+				default-state = "off";
+			};
+			led6 {
+				label = "LED6";
+				gpios = <&gpioi 6>;
+				default-state = "off";
+			};
+			led7 {
+				label = "LED7";
+				gpios = <&gpioi 7>;
+				default-state = "off";
+			};
+			led8 {
+				label = "LED8";
+				gpios = <&gpioi 8>;
+				default-state = "off";
+			};
+			led9 {
+				label = "LED9";
+				gpios = <&gpioi 9>;
+				default-state = "off";
+			};
+			led10 {
+				label = "LED10";
+				gpios = <&gpioi 10>;
+				default-state = "off";
+			};
+			led11 {
+				label = "LED11";
+				gpios = <&gpioi 11>;
+				default-state = "off";
+			};
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi
new file mode 100644
index 0000000..fd25c21
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb101.dtsi
@@ -0,0 +1,349 @@
+/*
+ * Abilis Systems TB101 SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/include/ "abilis_tb10x.dtsi"
+
+/* interrupt specifiers
+ * --------------------
+ * 0: rising, 1: low, 2: high, 3: falling,
+ */
+
+/ {
+	clock-frequency		= <500000000>;	/* 500 MHZ */
+
+	soc100 {
+		bus-frequency	= <166666666>;
+
+		pll0: oscillator {
+			clock-frequency  = <1000000000>;
+		};
+		cpu_clk: clkdiv_cpu {
+			clock-mult = <1>;
+			clock-div = <2>;
+		};
+		ahb_clk: clkdiv_ahb {
+			clock-mult = <1>;
+			clock-div = <6>;
+		};
+
+		iomux: iomux@FF10601c {
+			/* Port 1 */
+			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
+				pingrp = "mis0_pins";
+			};
+			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
+				pingrp = "mis1_pins";
+			};
+			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
+				pingrp = "gpioa_pins";
+			};
+			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
+				pingrp = "mip1_pins";
+			};
+			/* Port 2 */
+			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
+				pingrp = "mis2_pins";
+			};
+			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
+				pingrp = "mis3_pins";
+			};
+			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
+				pingrp = "gpioc_pins";
+			};
+			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
+				pingrp = "mip3_pins";
+			};
+			/* Port 3 */
+			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
+				pingrp = "mis4_pins";
+			};
+			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
+				pingrp = "mis5_pins";
+			};
+			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
+				pingrp = "gpioe_pins";
+			};
+			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
+				pingrp = "mip5_pins";
+			};
+			/* Port 4 */
+			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
+				pingrp = "mis6_pins";
+			};
+			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
+				pingrp = "mis7_pins";
+			};
+			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
+				pingrp = "gpiog_pins";
+			};
+			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
+				pingrp = "mip7_pins";
+			};
+			/* Port 5 */
+			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
+				pingrp = "gpioj_pins";
+			};
+			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
+				pingrp = "gpiok_pins";
+			};
+			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
+				pingrp = "ciplus_pins";
+			};
+			pctl_mcard: pctl-mcard {       /* M-Card interface */
+				pingrp = "mcard_pins";
+			};
+			pctl_stc0: pctl-stc0 {         /* Smart card I/F 0 */
+				pingrp = "stc0_pins";
+			};
+			pctl_stc1: pctl-stc1 {         /* Smart card I/F 1 */
+				pingrp = "stc1_pins";
+			};
+			/* Port 6 */
+			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
+				pingrp = "mop_pins";
+			};
+			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
+				pingrp = "mos0_pins";
+			};
+			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
+				pingrp = "mos1_pins";
+			};
+			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
+				pingrp = "mos2_pins";
+			};
+			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
+				pingrp = "mos3_pins";
+			};
+			/* Port 7 */
+			pctl_uart0: pctl-uart0 {       /* UART 0 */
+				pingrp = "uart0_pins";
+			};
+			pctl_uart1: pctl-uart1 {       /* UART 1 */
+				pingrp = "uart1_pins";
+			};
+			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
+				pingrp = "gpiol_pins";
+			};
+			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
+				pingrp = "gpiom_pins";
+			};
+			/* Port 8 */
+			pctl_spi3: pctl-spi3 {
+				pingrp = "spi3_pins";
+			};
+			pctl_jtag: pctl-jtag {
+				pingrp = "jtag_pins";
+			};
+			/* Port 9 */
+			pctl_spi1: pctl-spi1 {
+				pingrp = "spi1_pins";
+			};
+			pctl_gpio_n: pctl-gpio-n {
+				pingrp = "gpion_pins";
+			};
+			/* Unmuxed GPIOs */
+			pctl_gpio_b: pctl-gpio-b {
+				pingrp = "gpiob_pins";
+			};
+			pctl_gpio_d: pctl-gpio-d {
+				pingrp = "gpiod_pins";
+			};
+			pctl_gpio_f: pctl-gpio-f {
+				pingrp = "gpiof_pins";
+			};
+			pctl_gpio_h: pctl-gpio-h {
+				pingrp = "gpioh_pins";
+			};
+			pctl_gpio_i: pctl-gpio-i {
+				pingrp = "gpioi_pins";
+			};
+		};
+
+		gpioa: gpio@FF140000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF140000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <0>;
+			gpio-pins = <&pctl_gpio_a>;
+		};
+		gpiob: gpio@FF141000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF141000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <3>;
+			gpio-pins = <&pctl_gpio_b>;
+		};
+		gpioc: gpio@FF142000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF142000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <5>;
+			gpio-pins = <&pctl_gpio_c>;
+		};
+		gpiod: gpio@FF143000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF143000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <8>;
+			gpio-pins = <&pctl_gpio_d>;
+		};
+		gpioe: gpio@FF144000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF144000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <10>;
+			gpio-pins = <&pctl_gpio_e>;
+		};
+		gpiof: gpio@FF145000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF145000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <13>;
+			gpio-pins = <&pctl_gpio_f>;
+		};
+		gpiog: gpio@FF146000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF146000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <15>;
+			gpio-pins = <&pctl_gpio_g>;
+		};
+		gpioh: gpio@FF147000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF147000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <18>;
+			gpio-pins = <&pctl_gpio_h>;
+		};
+		gpioi: gpio@FF148000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF148000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <20>;
+			gpio-pins = <&pctl_gpio_i>;
+		};
+		gpioj: gpio@FF149000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF149000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <32>;
+			gpio-pins = <&pctl_gpio_j>;
+		};
+		gpiok: gpio@FF14a000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14A000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <64>;
+			gpio-pins = <&pctl_gpio_k>;
+		};
+		gpiol: gpio@FF14b000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14B000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <86>;
+			gpio-pins = <&pctl_gpio_l>;
+		};
+		gpiom: gpio@FF14c000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14C000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <90>;
+			gpio-pins = <&pctl_gpio_m>;
+		};
+		gpion: gpio@FF14d000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 1>;
+			reg = <0xFF14D000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <1>;
+			gpio-base  = <94>;
+			gpio-pins = <&pctl_gpio_n>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
new file mode 100644
index 0000000..6f8c381
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -0,0 +1,127 @@
+/*
+ * Abilis Systems TB101 Development Kit PCB device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/dts-v1/;
+
+/include/ "abilis_tb101.dtsi"
+
+/ {
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
+	};
+
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x08000000>;	/* 128M */
+	};
+
+	soc100 {
+		uart@FF100000 {
+			pinctrl-names = "abilis,simple-default";
+			pinctrl-0 = <&pctl_uart0>;
+		};
+		ethernet@FE100000 {
+			phy-mode = "rgmii";
+		};
+
+		i2c0: i2c@FF120000 {
+			sda-hold-time = <432>;
+		};
+		i2c1: i2c@FF121000 {
+			sda-hold-time = <432>;
+		};
+		i2c2: i2c@FF122000 {
+			sda-hold-time = <432>;
+		};
+		i2c3: i2c@FF123000 {
+			sda-hold-time = <432>;
+		};
+		i2c4: i2c@FF124000 {
+			sda-hold-time = <432>;
+		};
+
+		leds {
+			compatible = "gpio-leds";
+			power {
+				label = "Power";
+				gpios = <&gpioi 0>;
+				linux,default-trigger = "default-on";
+			};
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpioi 1>;
+				linux,default-trigger = "heartbeat";
+			};
+			led2 {
+				label = "LED2";
+				gpios = <&gpioi 2>;
+				default-state = "off";
+			};
+			led3 {
+				label = "LED3";
+				gpios = <&gpioi 3>;
+				default-state = "off";
+			};
+			led4 {
+				label = "LED4";
+				gpios = <&gpioi 4>;
+				default-state = "off";
+			};
+			led5 {
+				label = "LED5";
+				gpios = <&gpioi 5>;
+				default-state = "off";
+			};
+			led6 {
+				label = "LED6";
+				gpios = <&gpioi 6>;
+				default-state = "off";
+			};
+			led7 {
+				label = "LED7";
+				gpios = <&gpioi 7>;
+				default-state = "off";
+			};
+			led8 {
+				label = "LED8";
+				gpios = <&gpioi 8>;
+				default-state = "off";
+			};
+			led9 {
+				label = "LED9";
+				gpios = <&gpioi 9>;
+				default-state = "off";
+			};
+			led10 {
+				label = "LED10";
+				gpios = <&gpioi 10>;
+				default-state = "off";
+			};
+			led11 {
+				label = "LED11";
+				gpios = <&gpioi 11>;
+				default-state = "off";
+			};
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi
new file mode 100644
index 0000000..a6139fc
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb10x.dtsi
@@ -0,0 +1,247 @@
+/*
+ * Abilis Systems TB10X SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/* interrupt specifiers
+ * --------------------
+ * 0: rising, 1: low, 2: high, 3: falling,
+ */
+
+/ {
+	compatible		= "abilis,arc-tb10x";
+	#address-cells		= <1>;
+	#size-cells		= <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,arc770d";
+			reg = <0>;
+		};
+	};
+
+	soc100 {
+		#address-cells	= <1>;
+		#size-cells	= <1>;
+		device_type	= "soc";
+		ranges		= <0xfe000000 0xfe000000 0x02000000
+				0x000F0000 0x000F0000 0x00010000>;
+		compatible	= "abilis,tb10x", "simple-bus";
+
+		pll0: oscillator {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-output-names = "pll0";
+		};
+		cpu_clk: clkdiv_cpu {
+			compatible = "fixed-factor-clock";
+			#clock-cells = <0>;
+			clocks = <&pll0>;
+			clock-output-names = "cpu_clk";
+		};
+		ahb_clk: clkdiv_ahb {
+			compatible = "fixed-factor-clock";
+			#clock-cells = <0>;
+			clocks = <&pll0>;
+			clock-output-names = "ahb_clk";
+		};
+
+		iomux: iomux@FF10601c {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "abilis,tb10x-iomux";
+			reg = <0xFF10601c 0x4>;
+		};
+
+		intc: interrupt-controller {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+		tb10x_ictl: pic@fe002000 {
+			compatible = "abilis,tb10x_ictl";
+			reg = <0xFE002000 0x20>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&intc>;
+			interrupts = <5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+					20 21 22 23 24 25 26 27 28 29 30 31>;
+		};
+
+		uart@FF100000 {
+			compatible = "snps,dw-apb-uart",
+					"abilis,simple-pinctrl";
+			reg = <0xFF100000 0x100>;
+			clock-frequency = <166666666>;
+			interrupts = <25 1>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			interrupt-parent = <&tb10x_ictl>;
+		};
+		ethernet@FE100000 {
+			compatible = "snps,dwmac-3.70a","snps,dwmac";
+			reg = <0xFE100000 0x1058>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <6 1>;
+			interrupt-names = "macirq";
+			clocks = <&ahb_clk>;
+			clock-names = "stmmaceth";
+		};
+		dma@FE000000 {
+			compatible = "snps,dma-spear1340";
+			reg = <0xFE000000 0x400>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <14 1>;
+			dma-channels = <6>;
+			dma-requests = <0>;
+			dma-masters = <1>;
+			#dma-cells = <3>;
+			chan_allocation_order = <0>;
+			chan_priority = <1>;
+			block_size = <0x7ff>;
+			data_width = <2 0 0 0>;
+			clocks = <&ahb_clk>;
+			clock-names = "hclk";
+		};
+
+		i2c0: i2c@FF120000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xFF120000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 1>;
+			clocks = <&ahb_clk>;
+		};
+		i2c1: i2c@FF121000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xFF121000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 1>;
+			clocks = <&ahb_clk>;
+		};
+		i2c2: i2c@FF122000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xFF122000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 1>;
+			clocks = <&ahb_clk>;
+		};
+		i2c3: i2c@FF123000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xFF123000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 1>;
+			clocks = <&ahb_clk>;
+		};
+		i2c4: i2c@FF124000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xFF124000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 1>;
+			clocks = <&ahb_clk>;
+		};
+
+		spi0: spi@0xFE010000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "abilis,tb100-spi";
+			num-cs = <1>;
+			reg = <0xFE010000 0x20>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <26 1>;
+			clocks = <&ahb_clk>;
+		};
+		spi1: spi@0xFE011000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "abilis,tb100-spi",
+					"abilis,simple-pinctrl";
+			num-cs = <2>;
+			reg = <0xFE011000 0x20>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <10 1>;
+			clocks = <&ahb_clk>;
+		};
+
+		tb10x_tsm: tb10x-tsm@ff316000 {
+			compatible = "abilis,tb100-tsm";
+			reg = <0xff316000 0x400>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <17 1>;
+			output-clkdiv = <4>;
+			global-packet-delay = <0x21>;
+			port-packet-delay = <0>;
+		};
+		tb10x_stream_proc: tb10x-stream-proc {
+			compatible = "abilis,tb100-streamproc";
+			reg =   <0xfff00000 0x200>,
+				<0x000f0000 0x10000>,
+				<0xfff00200 0x105>,
+				<0xff10600c 0x1>,
+				<0xfe001018 0x1>;
+			reg-names =     "mbox",
+					"sp_iccm",
+					"mbox_irq",
+					"cpuctrl",
+					"a6it_int_force";
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <20 1>, <19 1>;
+			interrupt-names = "cmd_irq", "event_irq";
+		};
+		tb10x_mdsc0: tb10x-mdscr@FF300000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xFF300000 0x7000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_mscr0: tb10x-mdscr@FF307000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xFF307000 0x7000>;
+		};
+		tb10x_scr0: tb10x-mdscr@ff30e000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xFF30e000 0x4000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_scr1: tb10x-mdscr@ff312000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xFF312000 0x4000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_wfb: tb10x-wfb@ff319000 {
+			compatible = "abilis,tb100-wfb";
+			reg = <0xff319000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <16 1>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
new file mode 100644
index 0000000..ea16d78
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+	compatible = "snps,nsimosci";
+	clock-frequency = <80000000>;	/* 80 MHZ */
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+
+	chosen {
+		bootargs = "console=tty0 consoleblank=0";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		intc: interrupt-controller {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@c0000000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0xc0000000 0x2000>;
+			interrupts = <11>;
+			#clock-frequency = <80000000>;
+			clock-frequency = <3686400>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			status = "okay";
+		};
+
+		pgu0: pgu@c9000000 {
+			compatible = "snps,arcpgufb";
+			reg = <0xc9000000 0x400>;
+		};
+
+		ps2: ps2@c9001000 {
+			compatible = "snps,arc_ps2";
+			reg = <0xc9000400 0x14>;
+			interrupts = <13>;
+			interrupt-names = "arc_ps2_irq";
+		};
+
+		eth0: ethernet@c0003000 {
+			compatible = "snps,oscilan";
+			reg = <0xc0003000 0x44>;
+			interrupts = <7>, <8>;
+			interrupt-names = "rx", "tx";
+		};
+	};
+};
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index b869806..95350be 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -9,7 +9,7 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs"
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_SLUB_DEBUG is not set
@@ -24,6 +24,7 @@ CONFIG_ARC_PLAT_FPGA_LEGACY=y
 CONFIG_ARC_BOARD_ML509=y
 # CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
+CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
new file mode 100644
index 0000000..446c96c
--- /dev/null
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -0,0 +1,75 @@
+CONFIG_CROSS_COMPILE="arc-elf32-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_FPGA_LEGACY=y
+CONFIG_ARC_BOARD_ML509=y
+# CONFIG_ARC_IDE is not set
+# CONFIG_ARCTANGENT_EMAC is not set
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
new file mode 100644
index 0000000..4fa5cd9
--- /dev/null
+++ b/arch/arc/configs/tb10x_defconfig
@@ -0,0 +1,117 @@
+CONFIG_CROSS_COMPILE="arc-elf32-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="tb10x"
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../tb10x-rootfs.cpio"
+CONFIG_INITRAMFS_ROOT_UID=2100
+CONFIG_INITRAMFS_ROOT_GID=501
+# CONFIG_RD_GZIP is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLOCK is not set
+CONFIG_ARC_PLAT_TB10X=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_STACK_NONEXEC=y
+CONFIG_HZ=250
+CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_PROC_DEVICETREE=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+CONFIG_STMMAC_DEBUG_FS=y
+CONFIG_STMMAC_DA=y
+CONFIG_STMMAC_CHAINED=y
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_ONESHOT=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_GPIO=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_LEDS_TRIGGER_TRANSIENT=y
+CONFIG_DMADEVICES=y
+CONFIG_DW_DMAC=y
+CONFIG_NET_DMA=y
+CONFIG_ASYNC_TX_DMA=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CONFIGFS_FS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 97ee96f..ee1f6ea 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -20,12 +20,20 @@
 
 #include <linux/mm.h>
 
+/*
+ * Semantically we need this because icache doesn't snoop dcache/dma.
+ * However ARC Cache flush requires paddr as well as vaddr, latter not available
+ * in the flush_icache_page() API. So we no-op it but do the equivalent work
+ * in update_mmu_cache()
+ */
+#define flush_icache_page(vma, page)
+
 void flush_cache_all(void);
 
 void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
-				     int len);
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
+void __flush_dcache_page(unsigned long paddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
@@ -58,7 +66,7 @@ void dma_cache_wback(unsigned long start, unsigned long sz);
 do {									\
 	memcpy(dst, src, len);						\
 	if (vma->vm_flags & VM_EXEC)					\
-		flush_icache_range_vaddr((unsigned long)(dst), vaddr, len);\
+		__sync_icache_dcache((unsigned long)(dst), vaddr, len);	\
 } while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)		\
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 4c588f9..57898a1 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -9,7 +9,8 @@
 #ifndef __ASM_ARC_IRQ_H
 #define __ASM_ARC_IRQ_H
 
-#define NR_IRQS		32
+#define NR_CPU_IRQS	32  /* number of interrupt lines of ARC770 CPU */
+#define NR_IRQS		128 /* allow some CPU external IRQ handling */
 
 /* Platform Independent IRQs */
 #define TIMER0_IRQ      3
diff --git a/arch/arc/include/asm/serial.h b/arch/arc/include/asm/serial.h
index 4dff5a1..602b097 100644
--- a/arch/arc/include/asm/serial.h
+++ b/arch/arc/include/asm/serial.h
@@ -22,4 +22,14 @@
 
 #define BASE_BAUD	(arc_get_core_freq() / 16)
 
+/*
+ * This is definitely going to break early 8250 consoles on multi-platform
+ * images but hey, it won't add any code complexity for a debug feature of
+ * one broken driver.
+ */
+#ifdef CONFIG_ARC_PLAT_TB10X
+#undef BASE_BAUD
+#define BASE_BAUD	(arc_get_core_freq() / 16 / 3)
+#endif
+
 #endif /* _ASM_ARC_SERIAL_H */
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index 3eb2ce0..fe91719 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -21,20 +21,28 @@
 
 #ifndef __ASSEMBLY__
 
-#define tlb_flush(tlb) local_flush_tlb_mm((tlb)->mm)
+#define tlb_flush(tlb)				\
+do {						\
+	if (tlb->fullmm)			\
+		flush_tlb_mm((tlb)->mm);	\
+} while (0)
 
 /*
  * This pair is called at time of munmap/exit to flush cache and TLB entries
  * for mappings being torn down.
  * 1) cache-flush part -implemented via tlb_start_vma( ) can be NOP (for now)
  *    as we don't support aliasing configs in our VIPT D$.
- * 2) tlb-flush part - implemted via tlb_end_vma( ) can be NOP as well-
- *    albiet for difft reasons - its better handled by moving to new ASID
+ * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
  *
  * Note, read http://lkml.org/lkml/2004/1/15/6
  */
 #define tlb_start_vma(tlb, vma)
-#define tlb_end_vma(tlb, vma)
+
+#define tlb_end_vma(tlb, vma)						\
+do {									\
+	if (!tlb->fullmm)						\
+		flush_tlb_range(vma, vma->vm_start, vma->vm_end);	\
+} while (0)
 
 #define __tlb_remove_tlb_entry(tlb, ptep, address)
 
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 0dc148e..7dcda70 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -11,9 +11,9 @@
 #include <linux/interrupt.h>
 #include <linux/thread_info.h>
 #include <linux/kbuild.h>
+#include <linux/ptrace.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
-#include <asm/ptrace.h>
 
 int main(void)
 {
diff --git a/arch/arc/kernel/clk.c b/arch/arc/kernel/clk.c
index 66ce0dc..10c7b0b 100644
--- a/arch/arc/kernel/clk.c
+++ b/arch/arc/kernel/clk.c
@@ -8,7 +8,7 @@
 
 #include <asm/clk.h>
 
-unsigned long core_freq = 800000000;
+unsigned long core_freq = 80000000;
 
 /*
  * As of now we default to device-tree provided clock
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index d14764a..b8a549c 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -12,8 +12,8 @@
 #include <linux/types.h>
 #include <linux/kprobes.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <asm/disasm.h>
-#include <asm/uaccess.h>
 
 #if defined(CONFIG_KGDB) || defined(CONFIG_ARC_MISALIGN_ACCESS) || \
 	defined(CONFIG_KPROBES)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 91eeab8..0c6d664 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -393,12 +393,14 @@ ARC_ENTRY EV_TLBProtV
 #ifdef  CONFIG_ARC_MISALIGN_ACCESS
 	SAVE_CALLEE_SAVED_USER
 	mov r3, sp              ; callee_regs
-#endif
 
 	bl  do_misaligned_access
 
-#ifdef  CONFIG_ARC_MISALIGN_ACCESS
-	DISCARD_CALLEE_SAVED_USER
+	; TBD: optimize - do this only if a callee reg was involved
+	; either a dst of emulated LD/ST or src with address-writeback
+	RESTORE_CALLEE_SAVED_USER
+#else
+	bl  do_misaligned_error
 #endif
 
 	b   ret_from_exception
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 551c10d..8115fa5 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -11,6 +11,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
 #include <asm/sections.h>
 #include <asm/irq.h>
 #include <asm/mach_desc.h>
@@ -26,7 +28,7 @@
  * -Disable all IRQs (on CPU side)
  * -Optionally, setup the High priority Interrupts as Level 2 IRQs
  */
-void __init arc_init_IRQ(void)
+void __cpuinit arc_init_IRQ(void)
 {
 	int level_mask = 0;
 
@@ -97,15 +99,13 @@ static const struct irq_domain_ops arc_intc_domain_ops = {
 
 static struct irq_domain *root_domain;
 
-void __init init_onchip_IRQ(void)
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 {
-	struct device_node *intc = NULL;
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
 
-	intc = of_find_compatible_node(NULL, NULL, "snps,arc700-intc");
-	if(!intc)
-		panic("DeviceTree Missing incore intc\n");
-
-	root_domain = irq_domain_add_legacy(intc, NR_IRQS, 0, 0,
+	root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
 					    &arc_intc_domain_ops, NULL);
 
 	if (!root_domain)
@@ -113,8 +113,12 @@ void __init init_onchip_IRQ(void)
 
 	/* with this we don't need to export root_domain */
 	irq_set_default_host(root_domain);
+
+	return 0;
 }
 
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
 /*
  * Late Interrupt system init called from start_kernel for Boot CPU only
  *
@@ -123,12 +127,13 @@ void __init init_onchip_IRQ(void)
  */
 void __init init_IRQ(void)
 {
-	init_onchip_IRQ();
-
 	/* Any external intc can be setup here */
 	if (machine_desc->init_irq)
 		machine_desc->init_irq();
 
+	/* process the entire interrupt tree in one go */
+	irqchip_init();
+
 #ifdef CONFIG_SMP
 	/* Master CPU can initialize it's side of IPI */
 	if (machine_desc->init_smp)
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 3bfeacb..5a7b80e 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -10,7 +10,6 @@
 #include <linux/kprobes.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
index cdd3593..376e046 100644
--- a/arch/arc/kernel/module.c
+++ b/arch/arc/kernel/module.c
@@ -47,7 +47,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 		}
 	}
 #endif
-    return 0;
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
@@ -141,5 +141,5 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 		mod->arch.unw_info = unw;
 	}
 #endif
-    return 0;
+	return 0;
 }
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 2d95ac0..b2b3731 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -14,14 +14,13 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/of_fdt.h>
+#include <linux/cache.h>
 #include <asm/sections.h>
 #include <asm/arcregs.h>
 #include <asm/tlb.h>
-#include <asm/cache.h>
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/irq.h>
-#include <asm/arcregs.h>
 #include <asm/prom.h>
 #include <asm/unwind.h>
 #include <asm/clk.h>
@@ -32,14 +31,14 @@
 int running_on_hw = 1;	/* vs. on ISS */
 
 char __initdata command_line[COMMAND_LINE_SIZE];
-struct machine_desc *machine_desc __initdata;
+struct machine_desc *machine_desc __cpuinitdata;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
 
-void __init read_arc_build_cfg_regs(void)
+void __cpuinit read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -238,7 +237,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void __init arc_chk_ccms(void)
+void __cpuinit arc_chk_ccms(void)
 {
 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -273,7 +272,7 @@ void __init arc_chk_ccms(void)
  * hardware has dedicated regs which need to be saved/restored on ctx-sw
  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
  */
-void __init arc_chk_fpu(void)
+void __cpuinit arc_chk_fpu(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 
@@ -294,7 +293,7 @@ void __init arc_chk_fpu(void)
  *    such as only for boot CPU etc
  */
 
-void __init setup_processor(void)
+void __cpuinit setup_processor(void)
 {
 	char str[512];
 	int cpu_id = smp_processor_id();
@@ -319,23 +318,20 @@ void __init setup_processor(void)
 
 void __init setup_arch(char **cmdline_p)
 {
+	/* This also populates @boot_command_line from /bootargs */
+	machine_desc = setup_machine_fdt(__dtb_start);
+	if (!machine_desc)
+		panic("Embedded DT invalid\n");
+
+	/* Append any u-boot provided cmdline */
 #ifdef CONFIG_CMDLINE_UBOOT
-	/* Make sure that a whitespace is inserted before */
-	strlcat(command_line, " ", sizeof(command_line));
+	/* Add a whitespace seperator between the 2 cmdlines */
+	strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+	strlcat(boot_command_line, command_line, COMMAND_LINE_SIZE);
 #endif
-	/*
-	 * Append .config cmdline to base command line, which might already
-	 * contain u-boot "bootargs" (handled by head.S, if so configured)
-	 */
-	strlcat(command_line, CONFIG_CMDLINE, sizeof(command_line));
 
 	/* Save unparsed command line copy for /proc/cmdline */
-	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
-	*cmdline_p = command_line;
-
-	machine_desc = setup_machine_fdt(__dtb_start);
-	if (!machine_desc)
-		panic("Embedded DT invalid\n");
+	*cmdline_p = boot_command_line;
 
 	/* To force early parsing of things like mem=xxx */
 	parse_early_param();
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index f13f728..09f4309 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/timex.h>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 7496995..0471d9c 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -16,11 +16,12 @@
 #include <linux/sched.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
-#include <asm/ptrace.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/kgdb.h>
 #include <asm/setup.h>
-#include <asm/kprobes.h>
 #include <asm/unaligned.h>
-#include <asm/kgdb.h>
+#include <asm/kprobes.h>
 
 void __init trap_init(void)
 {
@@ -83,6 +84,7 @@ DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
 DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
 DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
+DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
 
 #ifdef CONFIG_ARC_MISALIGN_ACCESS
 /*
@@ -91,21 +93,11 @@ DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 int do_misaligned_access(unsigned long cause, unsigned long address,
 			 struct pt_regs *regs, struct callee_regs *cregs)
 {
-	if (misaligned_fixup(address, regs, cause, cregs) != 0) {
-		siginfo_t info;
-
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRALN;
-		info.si_addr = (void __user *)address;
-		return handle_exception(cause, "Misaligned Access", regs,
-					  &info);
-	}
+	if (misaligned_fixup(address, regs, cause, cregs) != 0)
+		return do_misaligned_error(cause, address, regs);
+
 	return 0;
 }
-
-#else
-DO_ERROR_INFO(SIGSEGV, "Misaligned Access", do_misaligned_access, SEGV_ACCERR)
 #endif
 
 /*
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 0aec0198..11c301b 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -26,7 +26,6 @@ static noinline void print_reg_file(long *reg_rev, int start_num)
 	char buf[512];
 	int n = 0, len = sizeof(buf);
 
-	/* weird loop because pt_regs regs rev r12..r0, r25..r13 */
 	for (i = start_num; i < start_num + 13; i++) {
 		n += scnprintf(buf + n, len - n, "r%02u: 0x%08lx\t",
 			       i, (unsigned long)*reg_rev);
@@ -34,13 +33,18 @@ static noinline void print_reg_file(long *reg_rev, int start_num)
 		if (((i + 1) % 3) == 0)
 			n += scnprintf(buf + n, len - n, "\n");
 
+		/* because pt_regs has regs reversed: r12..r0, r25..r13 */
 		reg_rev--;
 	}
 
 	if (start_num != 0)
 		n += scnprintf(buf + n, len - n, "\n\n");
 
-	pr_info("%s", buf);
+	/* To continue printing callee regs on same line as scratch regs */
+	if (start_num == 0)
+		pr_info("%s", buf);
+	else
+		pr_cont("%s\n", buf);
 }
 
 static void show_callee_regs(struct callee_regs *cregs)
@@ -83,6 +87,10 @@ static void show_faulting_vma(unsigned long address, char *buf)
 	dev_t dev = 0;
 	char *nm = buf;
 
+	/* can't use print_vma_addr() yet as it doesn't check for
+	 * non-inclusive vma
+	 */
+
 	vma = find_vma(current->active_mm, address);
 
 	/* check against the find_vma( ) behaviour which returns the next VMA
@@ -98,10 +106,13 @@ static void show_faulting_vma(unsigned long address, char *buf)
 			ino = inode->i_ino;
 		}
 		pr_info("    @off 0x%lx in [%s]\n"
-			"    VMA: 0x%08lx to 0x%08lx\n\n",
-		       address - vma->vm_start, nm, vma->vm_start, vma->vm_end);
-	} else
+			"    VMA: 0x%08lx to 0x%08lx\n",
+			vma->vm_start < TASK_UNMAPPED_BASE ?
+				address : address - vma->vm_start,
+			nm, vma->vm_start, vma->vm_end);
+	} else {
 		pr_info("    @No matching VMA found\n");
+	}
 }
 
 static void show_ecr_verbose(struct pt_regs *regs)
@@ -110,7 +121,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 	unsigned long address;
 
 	cause_reg = current->thread.cause_code;
-	pr_info("\n[ECR]: 0x%08x => ", cause_reg);
+	pr_info("\n[ECR   ]: 0x%08x => ", cause_reg);
 
 	/* For Data fault, this is data address not instruction addr */
 	address = current->thread.fault_address;
@@ -120,7 +131,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 
 	/* For DTLB Miss or ProtV, display the memory involved too */
 	if (vec == ECR_V_DTLB_MISS) {
-		pr_cont("Invalid (%s) @ 0x%08lx by insn @ 0x%08lx\n",
+		pr_cont("Invalid %s 0x%08lx by insn @ 0x%08lx\n",
 		       (cause_code == 0x01) ? "Read From" :
 		       ((cause_code == 0x02) ? "Write to" : "EX"),
 		       address, regs->ret);
@@ -168,20 +179,23 @@ void show_regs(struct pt_regs *regs)
 	if (current->thread.cause_code)
 		show_ecr_verbose(regs);
 
-	pr_info("[EFA]: 0x%08lx\n", current->thread.fault_address);
-	pr_info("[ERET]: 0x%08lx (PC of Faulting Instr)\n", regs->ret);
+	pr_info("[EFA   ]: 0x%08lx\n[BLINK ]: %pS\n[ERET  ]: %pS\n",
+		current->thread.fault_address,
+		(void *)regs->blink, (void *)regs->ret);
 
-	show_faulting_vma(regs->ret, buf);	/* faulting code, not data */
+	if (user_mode(regs))
+		show_faulting_vma(regs->ret, buf); /* faulting code, not data */
 
-	/* can't use print_vma_addr() yet as it doesn't check for
-	 * non-inclusive vma
-	 */
+	pr_info("[STAT32]: 0x%08lx", regs->status32);
+
+#define STS_BIT(r, bit)	r->status32 & STATUS_##bit##_MASK ? #bit : ""
+	if (!user_mode(regs))
+		pr_cont(" : %2s %2s %2s %2s %2s\n",
+			STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1),
+			STS_BIT(regs, E2), STS_BIT(regs, E1));
 
-	/* print special regs */
-	pr_info("status32: 0x%08lx\n", regs->status32);
-	pr_info(" SP: 0x%08lx\tFP: 0x%08lx\n", regs->sp, regs->fp);
-	pr_info("BTA: 0x%08lx\tBLINK: 0x%08lx\n",
-		regs->bta, regs->blink);
+	pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
+		regs->bta, regs->sp, regs->fp);
 	pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
 	       regs->lp_start, regs->lp_end, regs->lp_count);
 
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 88d617d..c854cf9 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -72,16 +72,6 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-static void __ic_line_inv_no_alias(unsigned long, int);
-static void __ic_line_inv_2_alias(unsigned long, int);
-static void __ic_line_inv_4_alias(unsigned long, int);
-
-/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
-static void (*___flush_icache_rtn) (unsigned long, int);
-#endif
-
 char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
@@ -109,7 +99,7 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
  * the cpuinfo structure for later use.
  * No Validation done here, simply read/convert the BCRs
  */
-void __init read_decode_cache_bcr(void)
+void __cpuinit read_decode_cache_bcr(void)
 {
 	struct bcr_cache ibcr, dbcr;
 	struct cpuinfo_arc_cache *p_ic, *p_dc;
@@ -141,7 +131,7 @@ void __init read_decode_cache_bcr(void)
  * 3. Enable the Caches, setup default flush mode for D-Cache
  * 3. Calculate the SHMLBA used by user space
  */
-void __init arc_cache_init(void)
+void __cpuinit arc_cache_init(void)
 {
 	unsigned int temp;
 	unsigned int cpu = smp_processor_id();
@@ -171,30 +161,6 @@ void __init arc_cache_init(void)
 
 	}
 #endif
-
-	/*
-	 * if Cache way size is <= page size then no aliasing exhibited
-	 * otherwise ratio determines num of aliases.
-	 * e.g. 32K I$, 2 way set assoc, 8k pg size
-	 *       way-sz = 32k/2 = 16k
-	 *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
-	 *       (meaning 1 line could be in 2 possible locations).
-	 */
-	way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
-	switch (way_pg_ratio) {
-	case 0:
-	case 1:
-		___flush_icache_rtn = __ic_line_inv_no_alias;
-		break;
-	case 2:
-		___flush_icache_rtn = __ic_line_inv_2_alias;
-		break;
-	case 4:
-		___flush_icache_rtn = __ic_line_inv_4_alias;
-		break;
-	default:
-		panic("Unsupported I-Cache Sz\n");
-	}
 #endif
 
 	/* Enable/disable I-Cache */
@@ -391,75 +357,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
 /*
  *		I-Cache Aliasing in ARC700 VIPT caches
  *
- * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
- * to "index" into SET of cache-line and paddr from MMU to match the TAG
- * in the WAYS of SET.
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
  *
- * However the CDU iterface (to flush/inv) lines from software, only takes
- * paddr (to have simpler hardware interface). For simpler cases, using paddr
- * alone suffices.
- * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
- *      way_sz = cache_sz / num_ways = 16k/2 = 8k
- *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
- *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
- *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
- *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
- *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
- *   locate a cache-line.
- *
- * However for a difft sized cache, say 32k I$, above math yields need
- * for 14 bits of vaddr to locate a cache line, which can't be provided by
- * paddr, since the bit 13 (0 based) might differ between the two.
- *
- * This lack of extra bits needed for correct line addressing, defines the
- * classical problem of Cache aliasing with VIPT architectures
- * num_aliases = 1 << extra_bits
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
- *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
- *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
  *
  * ------------------
  * MMU v1/v2 (Fixed Page Size 8k)
  * ------------------
  * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geom.
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
  * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
  * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
  * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
  * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU.
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
  *
  * 17:13 represented the max num of bits passable, actual bits needed were
  * fewer, based on the num-of-aliases possible.
  * -for 2 alias possibility, only bit 13 needed (32K cache)
  * -for 4 alias possibility, bits 14:13 needed (64K cache)
  *
- * Since vaddr was not available for all instances of I$ flush req by core
- * kernel, the only safe way (non-optimal though) was to kill all possible
- * lines which could represent an alias (even if they didnt represent one
- * in execution).
- * e.g. for 64K I$, 4 aliases possible, so we did
- *      flush start
- *      flush start | 0x01
- *      flush start | 0x2
- *      flush start | 0x3
- *
- * The penalty was invoking the operation itself, since tag match is anyways
- * paddr based, a line which didn't represent an alias would not match the
- * paddr, hence wont be killed
- *
- * Note that aliasing concerns are independent of line-sz for a given cache
- * geometry (size + set_assoc) because the extra bits required by line-sz are
- * reduced from the set calc.
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
- *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
- *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
- *
  * ------------------
  * MMU v3
  * ------------------
- * This ver of MMU supports var page sizes (1k-16k) - Linux will support
- * 8k (default), 16k and 4k.
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
  * However from hardware perspective, smaller page sizes aggrevate aliasing
  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
  * the existing scheme of piggybacking won't work for certain configurations.
@@ -468,115 +397,29 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
  */
 
 /***********************************************************
- * Machine specific helpers for per line I-Cache invalidate.
- * 3 routines to accpunt for 1, 2, 4 aliases possible
+ * Machine specific helper for per line I-Cache invalidate.
  */
-
-static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-#endif
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
-		/*
-		 *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
-		 * reg to pass the "tag" bits and existing IVIL reg only looks
-		 * at bits relevant for "index" (details above)
-		 * Programming Notes:
-		 * -when writing tag to PTAG reg, bit chopping can be avoided,
-		 *  CDU ignores non-tag bits.
-		 * -Ideally "index" must be computed from vaddr, but it is not
-		 *  avail in these rtns. So to be safe, we kill the lines in all
-		 *  possible indexes corresp to num of aliases possible for
-		 *  given cache config.
-		 */
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x1 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
-#else
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-#endif
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x3 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x2 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x1 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
-#else
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
-#endif
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv(unsigned long start, unsigned long sz)
+static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
+				unsigned long sz)
 {
 	unsigned long flags;
 	int num_lines, slack;
+	unsigned int addr;
 
 	/*
-	 * Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @start - aligned to cache line, and integral @num_lines
+	 * Ensure we properly floor/ceil the non-line aligned/sized requests:
 	 * However page sized flushes can be compile time optimised.
-	 *  -@start will be cache-line aligned already (being page aligned)
+	 *  -@phy_start will be cache-line aligned already (being page aligned)
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		slack = start & ~ICACHE_LINE_MASK;
+		slack = phy_start & ~ICACHE_LINE_MASK;
 		sz += slack;
-		start -= slack;
+		phy_start -= slack;
 	}
 
 	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 
-	local_irq_save(flags);
-	(*___flush_icache_rtn) (start, num_lines);
-	local_irq_restore(flags);
-}
-
-/* Unlike routines above, having vaddr for flush op (along with paddr),
- * prevents the need to speculatively kill the lines in multiple sets
- * based on ratio of way_sz : pg_sz
- */
-static void __ic_line_inv_vaddr(unsigned long phy_start,
-					 unsigned long vaddr, unsigned long sz)
-{
-	unsigned long flags;
-	int num_lines, slack;
-	unsigned int addr;
-
-	slack = phy_start & ~ICACHE_LINE_MASK;
-	sz += slack;
-	phy_start -= slack;
-	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
-
 #if (CONFIG_ARC_MMU_VER > 2)
 	vaddr &= ~ICACHE_LINE_MASK;
 	addr = phy_start;
@@ -595,7 +438,7 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
 		vaddr += ARC_ICACHE_LINE_LEN;
 #else
-		/* this paddr contains vaddrs bits as needed */
+		/* paddr contains stuffed vaddrs bits */
 		write_aux_reg(ARC_REG_IC_IVIL, addr);
 #endif
 		addr += ARC_ICACHE_LINE_LEN;
@@ -605,7 +448,6 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 
 #else
 
-#define __ic_line_inv(start, sz)
 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
@@ -615,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
  * Exported APIs
  */
 
-/* TBD: use pg_arch_1 to optimize this */
 void flush_dcache_page(struct page *page)
 {
-	__dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+	/* Make a note that dcache is not yet flushed for this page */
+	set_bit(PG_arch_1, &page->flags);
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
@@ -642,8 +484,8 @@ void dma_cache_wback(unsigned long start, unsigned long sz)
 EXPORT_SYMBOL(dma_cache_wback);
 
 /*
- * This is API for making I/D Caches consistent when modifying code
- * (loadable modules, kprobes,  etc)
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
  * This is called on insmod, with kernel virtual address for CODE of
  * the module. ARC cache maintenance ops require PHY address thus we
  * need to convert vmalloc addr to PHY addr
@@ -652,7 +494,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 {
 	unsigned int tot_sz, off, sz;
 	unsigned long phy, pfn;
-	unsigned long flags;
 
 	/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
 
@@ -673,8 +514,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 
 	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
 	if (likely(kstart > PAGE_OFFSET)) {
-		__ic_line_inv(kstart, kend - kstart);
-		__dc_line_op(kstart, kend - kstart, OP_FLUSH);
+		/*
+		 * The 2nd arg despite being paddr will be used to index icache
+		 * This is OK since no alternate virtual mappings will exist
+		 * given the callers for this case: kprobe/kgdb in built-in
+		 * kernel code only.
+		 */
+		__sync_icache_dcache(kstart, kstart, kend - kstart);
 		return;
 	}
 
@@ -692,42 +538,41 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 		pfn = vmalloc_to_pfn((void *)kstart);
 		phy = (pfn << PAGE_SHIFT) + off;
 		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
-		local_irq_save(flags);
-		__dc_line_op(phy, sz, OP_FLUSH);
-		__ic_line_inv(phy, sz);
-		local_irq_restore(flags);
+		__sync_icache_dcache(phy, kstart, sz);
 		kstart += sz;
 		tot_sz -= sz;
 	}
 }
 
 /*
- * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
- * where vaddr is also available. This allows passing both vaddr and paddr
- * bits to CDU for cache flush, short-circuting the current pessimistic algo
- * which kills all possible aliases.
- * An added adv of knowing that vaddr is user-vaddr avoids various checks
- * and handling for k-vaddr, k-paddr as done in orig ver above
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user or kernel vaddr (vmalloc)
+ *    Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    built-in kernel page will not have any virtual mappings (not even kernel)
+ *    kprobe on loadable module is different as it will have kvaddr.
  */
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
-			      int len)
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
 {
-	__ic_line_inv_vaddr(paddr, u_vaddr, len);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__ic_line_inv_vaddr(paddr, vaddr, len);
 	__dc_line_op(paddr, len, OP_FLUSH);
+	local_irq_restore(flags);
 }
 
-/*
- * XXX: This also needs to be optim using pg_arch_1
- * This is called when a page-cache page is about to be mapped into a
- * user process' address space.  It offers an opportunity for a
- * port to ensure d-cache/i-cache coherency if necessary.
- */
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
 {
-	if (!(vma->vm_flags & VM_EXEC))
-		return;
+	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+}
 
-	__ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+void __flush_dcache_page(unsigned long paddr)
+{
+	__dc_line_op(paddr, PAGE_SIZE, OP_FLUSH_N_INV);
 }
 
 void flush_icache_all(void)
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
index 014172b..aa652e2 100644
--- a/arch/arc/mm/extable.c
+++ b/arch/arc/mm/extable.c
@@ -27,7 +27,7 @@ int fixup_exception(struct pt_regs *regs)
 
 #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 
-long arc_copy_from_user_noinline(void *to, const void __user * from,
+long arc_copy_from_user_noinline(void *to, const void __user *from,
 		unsigned long n)
 {
 	return __arc_copy_from_user(to, from, n);
@@ -48,7 +48,7 @@ unsigned long arc_clear_user_noinline(void __user *to,
 }
 EXPORT_SYMBOL(arc_clear_user_noinline);
 
-long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
+long arc_strncpy_from_user_noinline(char *dst, const char __user *src,
 		long count)
 {
 	return __arc_strncpy_from_user(dst, src, count);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index af55aab..689ffd8 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
-#include <linux/version.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
 #include <asm/pgalloc.h>
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 727d479..4a17736 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -10,9 +10,6 @@
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#ifdef CONFIG_BLOCK_DEV_RAM
-#include <linux/blk.h>
-#endif
 #include <linux/swap.h>
 #include <linux/module.h>
 #include <asm/page.h>
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 3e5c92c..739e65f 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -12,7 +12,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/cache.h>
+#include <linux/cache.h>
 
 void __iomem *ioremap(unsigned long paddr, unsigned long size)
 {
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9b9ce23..003d69a 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -418,23 +418,37 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	local_irq_restore(flags);
 }
 
-/* arch hook called by core VM at the end of handle_mm_fault( ),
- * when a new PTE is entered in Page Tables or an existing one
- * is modified. We aggresively pre-install a TLB entry
+/*
+ * Called at the end of pagefault, for a userspace mapped page
+ *  -pre-install the corresponding TLB entry into MMU
+ *  -Finalize the delayed D-cache flush (wback+inv kernel mapping)
  */
-
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
 {
+	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+
+	create_tlb(vma, vaddr, ptep);
 
-	create_tlb(vma, vaddress, ptep);
+	/* icache doesn't snoop dcache, thus needs to be made coherent here */
+	if (vma->vm_flags & VM_EXEC) {
+		struct page *page = pfn_to_page(pte_pfn(*ptep));
+
+		/* if page was dcache dirty, flush now */
+		int dirty = test_and_clear_bit(PG_arch_1, &page->flags);
+		if (dirty) {
+			unsigned long paddr =  pte_val(*ptep) & PAGE_MASK;
+			__flush_dcache_page(paddr);
+			__inv_icache_page(paddr, vaddr);
+		}
+	}
 }
 
 /* Read the Cache Build Confuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void __init read_decode_mmu_bcr(void)
+void __cpuinit read_decode_mmu_bcr(void)
 {
 	unsigned int tmp;
 	struct bcr_mmu_1_2 *mmu2;	/* encoded MMU2 attr */
@@ -466,7 +480,7 @@ void __init read_decode_mmu_bcr(void)
 char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
-	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
 
 	n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
 		       p_mmu->ver, TO_KB(p_mmu->pg_sz));
@@ -480,7 +494,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void __init arc_mmu_init(void)
+void __cpuinit arc_mmu_init(void)
 {
 	char str[256];
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
index 4e20a1a..b3700c0 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-arcfpga/platform.c
@@ -224,3 +224,15 @@ MACHINE_START(ML509, "ml509")
 	.init_smp	= iss_model_init_smp,
 #endif
 MACHINE_END
+
+static const char *nsimosci_compat[] __initdata = {
+	"snps,nsimosci",
+	NULL,
+};
+
+MACHINE_START(NSIMOSCI, "nsimosci")
+	.dt_compat	= nsimosci_compat,
+	.init_early	= NULL,
+	.init_machine	= plat_fpga_populate_dev,
+	.init_irq	= NULL,
+MACHINE_END
diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig
new file mode 100644
index 0000000..4e12127
--- /dev/null
+++ b/arch/arc/plat-tb10x/Kconfig
@@ -0,0 +1,36 @@
+# Abilis Systems TB10x platform kernel configuration file
+#
+# Author: Christian Ruppert <christian.ruppert@abilis.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+
+
+menuconfig ARC_PLAT_TB10X
+	bool "Abilis TB10x"
+	select COMMON_CLK
+	select PINCTRL
+	select PINMUX
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Support for platforms based on the TB10x home media gateway SOC by
+	  Abilis Systems. TB10x is based on the ARC700 CPU architecture.
+	  Say Y if you are building a kernel for one of the SOCs in this
+	  series (e.g. TB100 or TB101). If in doubt say N.
+
+if ARC_PLAT_TB10X
+
+config GENERIC_GPIO
+	def_bool y
+
+endif
diff --git a/arch/arc/plat-tb10x/Makefile b/arch/arc/plat-tb10x/Makefile
new file mode 100644
index 0000000..89611d2
--- /dev/null
+++ b/arch/arc/plat-tb10x/Makefile
@@ -0,0 +1,21 @@
+# Abilis Systems TB10x platform Makefile
+#
+# Author: Christian Ruppert <christian.ruppert@abilis.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+
+
+KBUILD_CFLAGS   += -Iarch/arc/plat-tb10x/include
+
+obj-y += tb10x.o
diff --git a/arch/arc/plat-tb10x/tb10x.c b/arch/arc/plat-tb10x/tb10x.c
new file mode 100644
index 0000000..d356769
--- /dev/null
+++ b/arch/arc/plat-tb10x/tb10x.c
@@ -0,0 +1,71 @@
+/*
+ * Abilis Systems TB10x platform initialisation
+ *
+ * Copyright (C) Abilis Systems 2012
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/clk-provider.h>
+#include <linux/pinctrl/consumer.h>
+
+#include <asm/mach_desc.h>
+
+
+static void __init tb10x_platform_init(void)
+{
+	of_clk_init(NULL);
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+static void __init tb10x_platform_late_init(void)
+{
+	struct device_node *dn;
+
+	/*
+	 * Pinctrl documentation recommends setting up the iomux here for
+	 * all modules which don't require control over the pins themselves.
+	 * Modules which need this kind of assistance are compatible with
+	 * "abilis,simple-pinctrl", i.e. we can easily iterate over them.
+	 * TODO: Does this recommended method work cleanly with pins required
+	 * by modules?
+	 */
+	for_each_compatible_node(dn, NULL, "abilis,simple-pinctrl") {
+		struct platform_device *pd = of_find_device_by_node(dn);
+		struct pinctrl *pctl;
+
+		pctl = pinctrl_get_select(&pd->dev, "abilis,simple-default");
+		if (IS_ERR(pctl)) {
+			int ret = PTR_ERR(pctl);
+			dev_err(&pd->dev, "Could not set up pinctrl: %d\n",
+				ret);
+		}
+	}
+}
+
+static const char *tb10x_compat[] __initdata = {
+	"abilis,arc-tb10x",
+	NULL,
+};
+
+MACHINE_START(TB10x, "tb10x")
+	.dt_compat	= tb10x_compat,
+	.init_machine	= tb10x_platform_init,
+	.init_late	= tb10x_platform_late_init,
+MACHINE_END
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 5b6a40d..3aa3766 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -355,11 +355,17 @@ config HARDWALL
 config KERNEL_PL
 	int "Processor protection level for kernel"
 	range 1 2
-	default "1"
+	default 2 if TILEGX
+	default 1 if !TILEGX
 	---help---
-	  This setting determines the processor protection level the
-	  kernel will be built to run at.  Generally you should use
-	  the default value here.
+	  Since MDE 4.2, the Tilera hypervisor runs the kernel
+	  at PL2 by default.  If running under an older hypervisor,
+	  or as a KVM guest, you must run at PL1.  (The current
+	  hypervisor may also be recompiled with "make HV_PL=2" to
+	  allow it to run a kernel at PL1, but clients running at PL1
+	  are not expected to be supported indefinitely.)
+
+	  If you're not sure, don't change the default.
 
 source "arch/tile/gxio/Kconfig"
 
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index ccd847e..837dca5 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -107,7 +107,22 @@
 #define HV_DISPATCH_ENTRY_SIZE 32
 
 /** Version of the hypervisor interface defined by this file */
-#define _HV_VERSION 11
+#define _HV_VERSION 13
+
+/** Last version of the hypervisor interface with old hv_init() ABI.
+ *
+ * The change from version 12 to version 13 corresponds to launching
+ * the client by default at PL2 instead of PL1 (corresponding to the
+ * hv itself running at PL3 instead of PL2).  To make this explicit,
+ * the hv_init() API was also extended so the client can report its
+ * desired PL, resulting in a more helpful failure diagnostic.  If you
+ * call hv_init() with _HV_VERSION_OLD_HV_INIT and omit the client_pl
+ * argument, the hypervisor will assume client_pl = 1.
+ *
+ * Note that this is a deprecated solution and we do not expect to
+ * support clients of the Tilera hypervisor running at PL1 indefinitely.
+ */
+#define _HV_VERSION_OLD_HV_INIT 12
 
 /* Index into hypervisor interface dispatch code blocks.
  *
@@ -377,7 +392,11 @@ typedef int HV_Errno;
 #ifndef __ASSEMBLER__
 
 /** Pass HV_VERSION to hv_init to request this version of the interface. */
-typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
+typedef enum {
+  HV_VERSION = _HV_VERSION,
+  HV_VERSION_OLD_HV_INIT = _HV_VERSION_OLD_HV_INIT,
+
+} HV_VersionNumber;
 
 /** Initializes the hypervisor.
  *
@@ -385,9 +404,11 @@ typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
  * that this program expects, typically HV_VERSION.
  * @param chip_num Architecture number of the chip the client was built for.
  * @param chip_rev_num Revision number of the chip the client was built for.
+ * @param client_pl Privilege level the client is built for
+ *   (not required if interface_version_number == HV_VERSION_OLD_HV_INIT).
  */
 void hv_init(HV_VersionNumber interface_version_number,
-             int chip_num, int chip_rev_num);
+             int chip_num, int chip_rev_num, int client_pl);
 
 
 /** Queries we can make for hv_sysconf().
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index f71bfee..ac11530 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -38,7 +38,7 @@ ENTRY(_start)
 	  movei r2, TILE_CHIP_REV
 	}
 	{
-	  moveli r0, _HV_VERSION
+	  moveli r0, _HV_VERSION_OLD_HV_INIT
 	  jal hv_init
 	}
 	/* Get a reasonable default ASID in r0 */
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index f9a2734..6093964 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -34,13 +34,19 @@
 ENTRY(_start)
 	/* Notify the hypervisor of what version of the API we want */
 	{
+#if KERNEL_PL == 1 && _HV_VERSION == 13
+	  /* Support older hypervisors by asking for API version 12. */
+	  movei r0, _HV_VERSION_OLD_HV_INIT
+#else
+	  movei r0, _HV_VERSION
+#endif
 	  movei r1, TILE_CHIP
-	  movei r2, TILE_CHIP_REV
 	}
 	{
-	  moveli r0, _HV_VERSION
-	  jal hv_init
+	  movei r2, TILE_CHIP_REV
+	  movei r3, KERNEL_PL
 	}
+	jal hv_init
 	/* Get a reasonable default ASID in r0 */
 	{
 	  move r0, zero
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b16ac49..b34f79a 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(arch_spin_unlock_wait);
  * preserve the semantic that the same read lock can be acquired in an
  * interrupt context.
  */
-inline int arch_read_trylock(arch_rwlock_t *rwlock)
+int arch_read_trylock(arch_rwlock_t *rwlock)
 {
 	u32 val;
 	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index acdfc61..0a1b95f 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -1,11 +1,9 @@
-config FRAME_POINTER
-	def_bool n
-
 config ZONE_DMA
 	def_bool y
 
 config XTENSA
 	def_bool y
+	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_IDE
 	select GENERIC_ATOMIC64
 	select HAVE_GENERIC_HARDIRQS
@@ -49,6 +47,15 @@ config HZ
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
 config MMU
 	def_bool n
 
@@ -100,6 +107,35 @@ config MATH_EMULATION
 	help
 	Can we use information of configuration file?
 
+config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	bool "Initialize Xtensa MMU inside the Linux kernel code"
+	default y
+	help
+	  Earlier version initialized the MMU in the exception vector
+	  before jumping to _startup in head.S and had an advantage that
+	  it was possible to place a software breakpoint at 'reset' and
+	  then enter your normal kernel breakpoints once the MMU was mapped
+	  to the kernel mappings (0XC0000000).
+
+	  This unfortunately doesn't work for U-Boot and likley also wont
+	  work for using KEXEC to have a hot kernel ready for doing a
+	  KDUMP.
+
+	  So now the MMU is initialized in head.S but it's necessary to
+	  use hardware breakpoints (gdb 'hbreak' cmd) to break at _startup.
+	  xt-gdb can't place a Software Breakpoint in the  0XD region prior
+	  to mapping the MMU and after mapping even if the area of low memory
+	  was mapped gdb wouldn't remove the breakpoint on hitting it as the
+	  PC wouldn't match. Since Hardware Breakpoints are recommended for
+	  Linux configurations it seems reasonable to just assume they exist
+	  and leave this older mechanism for unfortunate souls that choose
+	  not to follow Tensilica's recommendation.
+
+	  Selecting this will cause U-Boot to set the KERNEL Load and Entry
+	  address at 0x00003000 instead of the mapped std of 0xD0003000.
+
+	  If in doubt, say Y.
+
 endmenu
 
 config XTENSA_CALIBRATE_CCOUNT
@@ -249,21 +285,6 @@ endmenu
 
 menu "Executable file formats"
 
-# only elf supported
-config KCORE_ELF
-	def_bool y
-        depends on PROC_FS
-        help
-          If you enabled support for /proc file system then the file
-          /proc/kcore will contain the kernel core image in ELF format. This
-          can be used in gdb:
-
-          $ cd /usr/src/linux ; gdb vmlinux /proc/kcore
-
-          This is especially useful if you have compiled the kernel with the
-          "-g" option to preserve debugging information. It is mainly used
-	  for examining kernel data structures on the live kernel.
-
 source "fs/Kconfig.binfmt"
 
 endmenu
diff --git a/arch/xtensa/boot/boot-elf/Makefile b/arch/xtensa/boot/boot-elf/Makefile
index 1fe01b7..89db089 100644
--- a/arch/xtensa/boot/boot-elf/Makefile
+++ b/arch/xtensa/boot/boot-elf/Makefile
@@ -12,6 +12,7 @@ endif
 
 export OBJCOPY_ARGS
 export CPPFLAGS_boot.lds += -P -C
+export KBUILD_AFLAGS += -mtext-section-literals
 
 boot-y		:= bootstrap.o
 
diff --git a/arch/xtensa/boot/boot-elf/boot.lds.S b/arch/xtensa/boot/boot-elf/boot.lds.S
index 7b646e0..932b58e 100644
--- a/arch/xtensa/boot/boot-elf/boot.lds.S
+++ b/arch/xtensa/boot/boot-elf/boot.lds.S
@@ -1,41 +1,29 @@
-#include <variant/core.h>
+/*
+ *  linux/arch/xtensa/boot/boot-elf/boot.lds.S
+ *
+ *  Copyright (C) 2008 - 2013 by Tensilica Inc.
+ *
+ *  Chris Zankel <chris@zankel.net>
+ *  Marc Gauthier <marc@tensilica.com
+ *  Pete Delaney <piet@tensilica.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/vectors.h>
 OUTPUT_ARCH(xtensa)
 ENTRY(_ResetVector)
 
 SECTIONS
 {
-	.start 0xD0000000 : { *(.start) }
-
-	.text 0xD0000000:
-	{
-		__reloc_start = . ;
-		_text_start = . ;
-		*(.literal .text.literal .text)
-		_text_end = . ;
-	}
-
-	.rodata ALIGN(0x04):
-	{
-		*(.rodata)
-		*(.rodata1)
-	}
-
-	.data ALIGN(0x04):
+	.ResetVector.text XCHAL_RESET_VECTOR_VADDR :
 	{
-		*(.data)
-		*(.data1)
-		*(.sdata)
-		*(.sdata2)
-		*(.got.plt)
-		*(.got)
-		*(.dynamic)
+		*(.ResetVector.text)
 	}
 
-	__reloc_end = . ;
-
-	. = ALIGN(0x10);
-	__image_load = . ;
-	.image 0xd0001000:
+	.image KERNELOFFSET: AT (LOAD_MEMORY_ADDRESS)
 	{
 		_image_start = .;
 		*(image)
@@ -43,7 +31,6 @@ SECTIONS
 		_image_end = .	;
 	}
 
-
 	.bss ((LOADADDR(.image) + SIZEOF(.image) + 3) & ~ 3):
 	{
 		__bss_start = .;
@@ -53,14 +40,15 @@ SECTIONS
 		*(.bss)
 		__bss_end = .;
 	}
-	_end = .;
-	_param_start = .;
 
-	.ResetVector.text XCHAL_RESET_VECTOR_VADDR :
+	/*
+	 * This is a remapped copy of the Reset Vector Code.
+	 * It keeps gdb in sync with the PC after switching
+	 * to the temporary mapping used while setting up
+	 * the V2 MMU mappings for Linux.
+	 */
+	.ResetVector.remapped_text 0x46000000 (INFO):
 	{
-		*(.ResetVector.text)
+		*(.ResetVector.remapped_text)
 	}
-
-
-	PROVIDE (end = .);
 }
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
index 464298b..1388a49 100644
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ b/arch/xtensa/boot/boot-elf/bootstrap.S
@@ -1,18 +1,77 @@
+/*
+ * arch/xtensa/boot/boot-elf/bootstrap.S
+ *
+ * Low-level exception handling
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 - 2013 by Tensilica Inc.
+ *
+ * Chris Zankel <chris@zankel.net>
+ * Marc Gauthier <marc@tensilica.com>
+ * Piet Delaney <piet@tensilica.com>
+ */
 
 #include <asm/bootparam.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/cacheasm.h>
+#include <asm/initialize_mmu.h>
+#include <linux/linkage.h>
 
-
-/* ResetVector
- */
-	.section        .ResetVector.text, "ax"
+	.section	.ResetVector.text, "ax"
 	.global         _ResetVector
+	.global         reset
+
 _ResetVector:
-	_j reset
+	_j _SetupMMU
+
+	.begin  no-absolute-literals
+	.literal_position
+
 	.align 4
 RomInitAddr:
-	.word 0xd0001000
+#if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \
+	XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+	.word 0x00003000
+#else
+	.word 0xd0003000
+#endif
 RomBootParam:
 	.word _bootparam
+_bootparam:
+	.short	BP_TAG_FIRST
+	.short	4
+	.long	BP_VERSION
+	.short	BP_TAG_LAST
+	.short	0
+	.long	0
+
+	.align  4
+_SetupMMU:
+	movi	a0, 0
+	wsr	a0, windowbase
+	rsync
+	movi	a0, 1
+	wsr	a0, windowstart
+	rsync
+	movi	a0, 0x1F
+	wsr	a0, ps
+	rsync
+
+	Offset = _SetupMMU - _ResetVector
+
+#ifndef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	initialize_mmu
+#endif
+
+	.end    no-absolute-literals
+
+	rsil    a0, XCHAL_DEBUGLEVEL-1
+	rsync
 reset:
 	l32r    a0, RomInitAddr
 	l32r	a2, RomBootParam
@@ -21,13 +80,25 @@ reset:
 	jx      a0
 
 	.align 4
-	.section .bootstrap.data, "aw"
 
-	.globl _bootparam
-_bootparam:
-	.short	BP_TAG_FIRST
-	.short	4
-	.long	BP_VERSION
-	.short	BP_TAG_LAST
-	.short	0
-	.long	0
+	.section	.ResetVector.remapped_text, "x"
+	.global         _RemappedResetVector
+
+	/* Do org before literals */
+	.org 0
+
+_RemappedResetVector:
+	.begin  no-absolute-literals
+	.literal_position
+
+	_j	_RemappedSetupMMU
+
+	/* Position Remapped code at the same location as the original code */
+	. = _RemappedResetVector + Offset
+
+_RemappedSetupMMU:
+#ifndef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	initialize_mmu
+#endif
+
+	.end    no-absolute-literals
diff --git a/arch/xtensa/boot/boot-redboot/boot.ld b/arch/xtensa/boot/boot-redboot/boot.ld
index 5bbcaf9..b0b9e95 100644
--- a/arch/xtensa/boot/boot-redboot/boot.ld
+++ b/arch/xtensa/boot/boot-redboot/boot.ld
@@ -33,7 +33,7 @@ SECTIONS
 
 	. = ALIGN(0x10);
 	__image_load = . ;
-	.image 0xd0001000: AT(__image_load)
+	.image 0xd0003000: AT(__image_load)
 	{
 		_image_start = .;
 		*(image)
diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile
index bfbf8af..5457598 100644
--- a/arch/xtensa/boot/boot-uboot/Makefile
+++ b/arch/xtensa/boot/boot-uboot/Makefile
@@ -4,7 +4,11 @@
 # for more details.
 #
 
-UIMAGE_LOADADDR = 0xd0001000
+ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+UIMAGE_LOADADDR = 0x00003000
+else
+UIMAGE_LOADADDR = 0xd0003000
+endif
 UIMAGE_COMPRESSION = gzip
 
 $(obj)/../uImage: vmlinux.bin.gz FORCE
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 095f0a2..1b98264 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
+generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += percpu.h
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
index 40a8c17..36dc7a6 100644
--- a/arch/xtensa/include/asm/ftrace.h
+++ b/arch/xtensa/include/asm/ftrace.h
@@ -1 +1,33 @@
-/* empty */
+/*
+ * arch/xtensa/include/asm/ftrace.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Tensilica Inc.
+ */
+#ifndef _XTENSA_FTRACE_H
+#define _XTENSA_FTRACE_H
+
+#include <asm/processor.h>
+
+#define HAVE_ARCH_CALLER_ADDR
+#define CALLER_ADDR0 ({ unsigned long a0, a1; \
+		__asm__ __volatile__ ( \
+			"mov %0, a0\n" \
+			"mov %1, a1\n" \
+			: "=r"(a0), "=r"(a1) : : ); \
+		MAKE_PC_FROM_RA(a0, a1); })
+#ifdef CONFIG_FRAME_POINTER
+extern unsigned long return_address(unsigned level);
+#define CALLER_ADDR1 return_address(1)
+#define CALLER_ADDR2 return_address(2)
+#define CALLER_ADDR3 return_address(3)
+#else
+#define CALLER_ADDR1 (0)
+#define CALLER_ADDR2 (0)
+#define CALLER_ADDR3 (0)
+#endif
+
+#endif /* _XTENSA_FTRACE_H */
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index e1f8ba4..722553f 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -23,6 +23,9 @@
 #ifndef _XTENSA_INITIALIZE_MMU_H
 #define _XTENSA_INITIALIZE_MMU_H
 
+#include <asm/pgtable.h>
+#include <asm/vectors.h>
+
 #ifdef __ASSEMBLY__
 
 #define XTENSA_HWVERSION_RC_2009_0 230000
@@ -48,6 +51,110 @@
 	 * (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RC_2009_0)
 	 */
 
+#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+/*
+ * Have MMU v3
+ */
+
+#if !XCHAL_HAVE_VECBASE
+# error "MMU v3 requires reloc vectors"
+#endif
+
+	movi	a1, 0
+	_call0	1f
+	_j	2f
+
+	.align	4
+1:	movi	a2, 0x10000000
+	movi	a3, 0x18000000
+	add	a2, a2, a0
+9:	bgeu	a2, a3, 9b	/* PC is out of the expected range */
+
+	/* Step 1: invalidate mapping at 0x40000000..0x5FFFFFFF. */
+
+	movi	a2, 0x40000006
+	idtlb	a2
+	iitlb	a2
+	isync
+
+	/* Step 2: map 0x40000000..0x47FFFFFF to paddr containing this code
+	 * and jump to the new mapping.
+	 */
+#define CA_BYPASS	(_PAGE_CA_BYPASS | _PAGE_HW_WRITE | _PAGE_HW_EXEC)
+#define CA_WRITEBACK	(_PAGE_CA_WB     | _PAGE_HW_WRITE | _PAGE_HW_EXEC)
+
+	srli	a3, a0, 27
+	slli	a3, a3, 27
+	addi	a3, a3, CA_BYPASS
+	addi	a7, a2, -1
+	wdtlb	a3, a7
+	witlb	a3, a7
+	isync
+
+	slli	a4, a0, 5
+	srli	a4, a4, 5
+	addi	a5, a2, -6
+	add	a4, a4, a5
+	jx	a4
+
+	/* Step 3: unmap everything other than current area.
+	 *	   Start at 0x60000000, wrap around, and end with 0x20000000
+	 */
+2:	movi	a4, 0x20000000
+	add	a5, a2, a4
+3:	idtlb	a5
+	iitlb	a5
+	add	a5, a5, a4
+	bne	a5, a2, 3b
+
+	/* Step 4: Setup MMU with the old V2 mappings. */
+	movi	a6, 0x01000000
+	wsr	a6, ITLBCFG
+	wsr	a6, DTLBCFG
+	isync
+
+	movi	a5, 0xd0000005
+	movi	a4, CA_WRITEBACK
+	wdtlb	a4, a5
+	witlb	a4, a5
+
+	movi	a5, 0xd8000005
+	movi	a4, CA_BYPASS
+	wdtlb	a4, a5
+	witlb	a4, a5
+
+	movi	a5, 0xe0000006
+	movi	a4, 0xf0000000 + CA_WRITEBACK
+	wdtlb	a4, a5
+	witlb	a4, a5
+
+	movi	a5, 0xf0000006
+	movi	a4, 0xf0000000 + CA_BYPASS
+	wdtlb	a4, a5
+	witlb	a4, a5
+
+	isync
+
+	/* Jump to self, using MMU v2 mappings. */
+	movi	a4, 1f
+	jx	a4
+
+1:
+	movi    a2, VECBASE_RESET_VADDR
+	wsr	a2, vecbase
+
+	/* Step 5: remove temporary mapping. */
+	idtlb	a7
+	iitlb	a7
+	isync
+
+	movi	a0, 0
+	wsr	a0, ptevaddr
+	rsync
+
+#endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU &&
+	  XCHAL_HAVE_SPANNING_WAY */
+
 	.endm
 
 #endif /*__ASSEMBLY__*/
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index f865b1c..ea36674 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -47,7 +47,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	return (flags & 0xf) != 0;
+#if XCHAL_EXCM_LEVEL < LOCKLEVEL || (1 << PS_EXCM_BIT) < LOCKLEVEL
+#error "XCHAL_EXCM_LEVEL and 1<<PS_EXCM_BIT must be no less than LOCKLEVEL"
+#endif
+	return (flags & (PS_INTLEVEL_MASK | (1 << PS_EXCM_BIT))) >= LOCKLEVEL;
 }
 
 static inline bool arch_irqs_disabled(void)
diff --git a/arch/xtensa/include/asm/linkage.h b/arch/xtensa/include/asm/linkage.h
deleted file mode 100644
index bf2128a..0000000
--- a/arch/xtensa/include/asm/linkage.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * include/asm-xtensa/linkage.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_LINKAGE_H
-#define _XTENSA_LINKAGE_H
-
-/* Nothing to do here ... */
-
-#endif	/* _XTENSA_LINKAGE_H */
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
new file mode 100644
index 0000000..6a05fcb
--- /dev/null
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -0,0 +1,36 @@
+/*
+ * arch/xtensa/include/asm/stacktrace.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2013 Tensilica Inc.
+ */
+#ifndef _XTENSA_STACKTRACE_H
+#define _XTENSA_STACKTRACE_H
+
+#include <linux/sched.h>
+
+struct stackframe {
+	unsigned long pc;
+	unsigned long sp;
+};
+
+static __always_inline unsigned long *stack_pointer(struct task_struct *task)
+{
+	unsigned long *sp;
+
+	if (!task || task == current)
+		__asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp));
+	else
+		sp = (unsigned long *)task->thread.sp;
+
+	return sp;
+}
+
+void walk_stackframe(unsigned long *sp,
+		int (*fn)(struct stackframe *frame, void *data),
+		void *data);
+
+#endif /* _XTENSA_STACKTRACE_H */
diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h
index 9e85ce8..3d35e5d 100644
--- a/arch/xtensa/include/asm/timex.h
+++ b/arch/xtensa/include/asm/timex.h
@@ -19,13 +19,16 @@
 #define _INTLEVEL(x)	XCHAL_INT ## x ## _LEVEL
 #define INTLEVEL(x)	_INTLEVEL(x)
 
-#if INTLEVEL(XCHAL_TIMER0_INTERRUPT) <= XCHAL_EXCM_LEVEL
+#if XCHAL_NUM_TIMERS > 0 && \
+	INTLEVEL(XCHAL_TIMER0_INTERRUPT) <= XCHAL_EXCM_LEVEL
 # define LINUX_TIMER     0
 # define LINUX_TIMER_INT XCHAL_TIMER0_INTERRUPT
-#elif INTLEVEL(XCHAL_TIMER1_INTERRUPT) <= XCHAL_EXCM_LEVEL
+#elif XCHAL_NUM_TIMERS > 1 && \
+	INTLEVEL(XCHAL_TIMER1_INTERRUPT) <= XCHAL_EXCM_LEVEL
 # define LINUX_TIMER     1
 # define LINUX_TIMER_INT XCHAL_TIMER1_INTERRUPT
-#elif INTLEVEL(XCHAL_TIMER2_INTERRUPT) <= XCHAL_EXCM_LEVEL
+#elif XCHAL_NUM_TIMERS > 2 && \
+	INTLEVEL(XCHAL_TIMER2_INTERRUPT) <= XCHAL_EXCM_LEVEL
 # define LINUX_TIMER     2
 # define LINUX_TIMER_INT XCHAL_TIMER2_INTERRUPT
 #else
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index b5464ef..917488a 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -22,10 +22,9 @@ extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
 
 static inline void spill_registers(void)
 {
-	unsigned int a0, ps;
 
 	__asm__ __volatile__ (
-		"movi	a14, " __stringify(PS_EXCM_BIT | LOCKLEVEL) "\n\t"
+		"movi	a14, "__stringify((1 << PS_EXCM_BIT) | LOCKLEVEL)"\n\t"
 		"mov	a12, a0\n\t"
 		"rsr	a13, sar\n\t"
 		"xsr	a14, ps\n\t"
@@ -35,7 +34,7 @@ static inline void spill_registers(void)
 		"mov	a0, a12\n\t"
 		"wsr	a13, sar\n\t"
 		"wsr	a14, ps\n\t"
-		: : "a" (&a0), "a" (&ps)
+		: :
 #if defined(CONFIG_FRAME_POINTER)
 		: "a2", "a3", "a4",       "a11", "a12", "a13", "a14", "a15",
 #else
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
new file mode 100644
index 0000000..c52b656
--- /dev/null
+++ b/arch/xtensa/include/asm/vectors.h
@@ -0,0 +1,125 @@
+/*
+ * arch/xtensa/include/asm/xchal_vaddr_remap.h
+ *
+ * Xtensa macros for MMU V3 Support. Deals with re-mapping the Virtual
+ * Memory Addresses from "Virtual == Physical" to their prevvious V2 MMU
+ * mappings (KSEG at 0xD0000000 and KIO at 0XF0000000).
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 - 2012 Tensilica Inc.
+ *
+ * Pete Delaney <piet@tensilica.com>
+ * Marc Gauthier <marc@tensilica.com
+ */
+
+#ifndef _XTENSA_VECTORS_H
+#define _XTENSA_VECTORS_H
+
+#include <variant/core.h>
+
+#if defined(CONFIG_MMU)
+
+/* Will Become VECBASE */
+#define VIRTUAL_MEMORY_ADDRESS		0xD0000000
+
+/* Image Virtual Start Address */
+#define KERNELOFFSET			0xD0003000
+
+#if defined(XCHAL_HAVE_PTP_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+  /* MMU v3  - XCHAL_HAVE_PTP_MMU  == 1 */
+  #define PHYSICAL_MEMORY_ADDRESS	0x00000000
+  #define LOAD_MEMORY_ADDRESS		0x00003000
+#else
+  /* MMU V2 -  XCHAL_HAVE_PTP_MMU  == 0 */
+  #define PHYSICAL_MEMORY_ADDRESS	0xD0000000
+  #define LOAD_MEMORY_ADDRESS		0xD0003000
+#endif
+
+#else /* !defined(CONFIG_MMU) */
+  /* MMU Not being used - Virtual == Physical */
+
+  /* VECBASE */
+  #define VIRTUAL_MEMORY_ADDRESS	0x00002000
+
+  /* Location of the start of the kernel text, _start */
+  #define KERNELOFFSET			0x00003000
+  #define PHYSICAL_MEMORY_ADDRESS	0x00000000
+
+  /* Loaded just above possibly live vectors */
+  #define LOAD_MEMORY_ADDRESS		0x00003000
+
+#endif /* CONFIG_MMU */
+
+#define XC_VADDR(offset)		(VIRTUAL_MEMORY_ADDRESS  + offset)
+#define XC_PADDR(offset)		(PHYSICAL_MEMORY_ADDRESS + offset)
+
+/* Used to set VECBASE register */
+#define VECBASE_RESET_VADDR		VIRTUAL_MEMORY_ADDRESS
+
+#define RESET_VECTOR_VECOFS		(XCHAL_RESET_VECTOR_VADDR - \
+						VECBASE_RESET_VADDR)
+#define RESET_VECTOR_VADDR		XC_VADDR(RESET_VECTOR_VECOFS)
+
+#define RESET_VECTOR1_VECOFS		(XCHAL_RESET_VECTOR1_VADDR - \
+						VECBASE_RESET_VADDR)
+#define RESET_VECTOR1_VADDR		XC_VADDR(RESET_VECTOR1_VECOFS)
+
+#if XCHAL_HAVE_VECBASE
+
+#define USER_VECTOR_VADDR		XC_VADDR(XCHAL_USER_VECOFS)
+#define KERNEL_VECTOR_VADDR		XC_VADDR(XCHAL_KERNEL_VECOFS)
+#define DOUBLEEXC_VECTOR_VADDR		XC_VADDR(XCHAL_DOUBLEEXC_VECOFS)
+#define WINDOW_VECTORS_VADDR		XC_VADDR(XCHAL_WINDOW_OF4_VECOFS)
+#define INTLEVEL2_VECTOR_VADDR		XC_VADDR(XCHAL_INTLEVEL2_VECOFS)
+#define INTLEVEL3_VECTOR_VADDR		XC_VADDR(XCHAL_INTLEVEL3_VECOFS)
+#define INTLEVEL4_VECTOR_VADDR		XC_VADDR(XCHAL_INTLEVEL4_VECOFS)
+#define INTLEVEL5_VECTOR_VADDR		XC_VADDR(XCHAL_INTLEVEL5_VECOFS)
+#define INTLEVEL6_VECTOR_VADDR		XC_VADDR(XCHAL_INTLEVEL6_VECOFS)
+
+#define DEBUG_VECTOR_VADDR		XC_VADDR(XCHAL_DEBUG_VECOFS)
+
+#undef  XCHAL_NMI_VECTOR_VADDR
+#define XCHAL_NMI_VECTOR_VADDR		XC_VADDR(XCHAL_NMI_VECOFS)
+
+#undef  XCHAL_INTLEVEL7_VECTOR_VADDR
+#define XCHAL_INTLEVEL7_VECTOR_VADDR	XC_VADDR(XCHAL_INTLEVEL7_VECOFS)
+
+/*
+ * These XCHAL_* #defines from varian/core.h
+ * are not valid to use with V3 MMU. Non-XCHAL
+ * constants are defined above and should be used.
+ */
+#undef  XCHAL_VECBASE_RESET_VADDR
+#undef  XCHAL_RESET_VECTOR0_VADDR
+#undef  XCHAL_USER_VECTOR_VADDR
+#undef  XCHAL_KERNEL_VECTOR_VADDR
+#undef  XCHAL_DOUBLEEXC_VECTOR_VADDR
+#undef  XCHAL_WINDOW_VECTORS_VADDR
+#undef  XCHAL_INTLEVEL2_VECTOR_VADDR
+#undef  XCHAL_INTLEVEL3_VECTOR_VADDR
+#undef  XCHAL_INTLEVEL4_VECTOR_VADDR
+#undef  XCHAL_INTLEVEL5_VECTOR_VADDR
+#undef  XCHAL_INTLEVEL6_VECTOR_VADDR
+#undef  XCHAL_DEBUG_VECTOR_VADDR
+#undef  XCHAL_NMI_VECTOR_VADDR
+#undef  XCHAL_INTLEVEL7_VECTOR_VADDR
+
+#else
+
+#define USER_VECTOR_VADDR		XCHAL_USER_VECTOR_VADDR
+#define KERNEL_VECTOR_VADDR		XCHAL_KERNEL_VECTOR_VADDR
+#define DOUBLEEXC_VECTOR_VADDR		XCHAL_DOUBLEEXC_VECTOR_VADDR
+#define WINDOW_VECTORS_VADDR		XCHAL_WINDOW_VECTORS_VADDR
+#define INTLEVEL2_VECTOR_VADDR		XCHAL_INTLEVEL2_VECTOR_VADDR
+#define INTLEVEL3_VECTOR_VADDR		XCHAL_INTLEVEL3_VECTOR_VADDR
+#define INTLEVEL4_VECTOR_VADDR		XCHAL_INTLEVEL4_VECTOR_VADDR
+#define INTLEVEL5_VECTOR_VADDR		XCHAL_INTLEVEL5_VECTOR_VADDR
+#define INTLEVEL6_VECTOR_VADDR		XCHAL_INTLEVEL6_VECTOR_VADDR
+#define DEBUG_VECTOR_VADDR		XCHAL_DEBUG_VECTOR_VADDR
+
+#endif
+
+#endif /* _XTENSA_VECTORS_H */
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index c3a59d9..1e7fc87 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -4,14 +4,16 @@
 
 extra-y := head.o vmlinux.lds
 
-obj-y := align.o entry.o irq.o coprocessor.o process.o ptrace.o \
-	 setup.o signal.o syscall.o time.o traps.o vectors.o platform.o  \
-	 pci-dma.o
+obj-y := align.o coprocessor.o entry.o irq.o pci-dma.o platform.o process.o \
+	 ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \
+	 vectors.o
 
 obj-$(CONFIG_KGDB) += xtensa-stub.o
 obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 
+AFLAGS_head.o += -mtext-section-literals
+
 # In the Xtensa architecture, assembly generates literals which must always
 # precede the L32R instruction with a relative offset less than 256 kB.
 # Therefore, the .text and .literal section must be combined in parenthesis
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 63845f9..5082507 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -354,16 +354,16 @@ common_exception:
 	 * so we can allow exceptions and interrupts (*) again.
 	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
 	 *
-	 * (*) We only allow interrupts of higher priority than current IRQ
+	 * (*) We only allow interrupts if they were previously enabled and
+	 *     we're not handling an IRQ
 	 */
 
 	rsr	a3, ps
-	addi	a0, a0, -4
-	movi	a2, 1
+	addi	a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
+	movi	a2, LOCKLEVEL
 	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
 					# a3 = PS.INTLEVEL
-	movnez	a2, a3, a3		# a2 = 1: level-1, > 1: high priority
-	moveqz	a3, a2, a0		# a3 = IRQ level iff interrupt
+	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
 	rsr	a0, exccause
@@ -389,6 +389,22 @@ common_exception:
 
 	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
 	
+#ifdef CONFIG_TRACE_IRQFLAGS
+	l32i	a4, a1, PT_DEPC
+	/* Double exception means we came here with an exception
+	 * while PS.EXCM was set, i.e. interrupts disabled.
+	 */
+	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
+	l32i	a4, a1, PT_EXCCAUSE
+	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
+	/* We came here with an interrupt means interrupts were enabled
+	 * and we've just disabled them.
+	 */
+	movi	a4, trace_hardirqs_off
+	callx4	a4
+1:
+#endif
+
 	/* Go to second-level dispatcher. Set up parameters to pass to the
 	 * exception handler and call the exception handler.
 	 */
@@ -407,11 +423,29 @@ common_exception:
 	.global common_exception_return
 common_exception_return:
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	l32i	a4, a1, PT_DEPC
+	/* Double exception means we came here with an exception
+	 * while PS.EXCM was set, i.e. interrupts disabled.
+	 */
+	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
+	l32i	a4, a1, PT_EXCCAUSE
+	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
+	/* We came here with an interrupt means interrupts were enabled
+	 * and we'll reenable them on return.
+	 */
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+1:
+#endif
+
 	/* Jump if we are returning from kernel exceptions. */
 
 1:	l32i	a3, a1, PT_PS
 	_bbci.l	a3, PS_UM_BIT, 4f
 
+	rsil	a2, 0
+
 	/* Specific to a user exception exit:
 	 * We need to check some flags for signal handling and rescheduling,
 	 * and have to restore WB and WS, extra states, and all registers
@@ -652,51 +686,19 @@ common_exception_exit:
 
 	l32i	a0, a1, PT_DEPC
 	l32i	a3, a1, PT_AREG3
-	_bltui	a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
-	wsr	a0, depc
 	l32i	a2, a1, PT_AREG2
-	l32i	a0, a1, PT_AREG0
-	l32i	a1, a1, PT_AREG1
-	rfde
+	_bgeui	a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
 
-1:
 	/* Restore a0...a3 and return */
 
-	rsr	a0, ps
-	extui	a2, a0, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-	movi	a0, 2f
-	slli	a2, a2, 4
-	add	a0, a2, a0
-	l32i	a2, a1, PT_AREG2
-	jx	a0
-
-	.macro	irq_exit_level level
-	.align	16
-	.if	XCHAL_EXCM_LEVEL >= \level
-	l32i	a0, a1, PT_PC
-	wsr	a0, epc\level
 	l32i	a0, a1, PT_AREG0
 	l32i	a1, a1, PT_AREG1
-	rfi	\level
-	.endif
-	.endm
+	rfe
 
-	.align	16
-2:
+1: 	wsr	a0, depc
 	l32i	a0, a1, PT_AREG0
 	l32i	a1, a1, PT_AREG1
-	rfe
-
-	.align	16
-	/* no rfi for level-1 irq, handled by rfe above*/
-	nop
-
-	irq_exit_level 2
-	irq_exit_level 3
-	irq_exit_level 4
-	irq_exit_level 5
-	irq_exit_level 6
+	rfde
 
 ENDPROC(kernel_exception)
 
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index df88f98..ef12c0e 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -48,17 +48,36 @@
 	 */
 
 	__HEAD
+	.begin	no-absolute-literals
+
 ENTRY(_start)
 
-	_j	2f
+	/* Preserve the pointer to the boot parameter list in EXCSAVE_1 */
+	wsr     a2, excsave1
+	_j	_SetupMMU
+
+	.align	4
+	.literal_position
+.Lstartup:
+	.word	_startup
+
 	.align	4
-1:	.word	_startup
-2:	l32r	a0, 1b
+	.global _SetupMMU
+_SetupMMU:
+	Offset = _SetupMMU - _start
+
+#ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	initialize_mmu
+#endif
+	.end	no-absolute-literals
+
+	l32r	a0, .Lstartup
 	jx	a0
 
 ENDPROC(_start)
 
-	.section .init.text, "ax"
+	__INIT
+	.literal_position
 
 ENTRY(_startup)
 
@@ -67,10 +86,6 @@ ENTRY(_startup)
 	movi	a0, LOCKLEVEL
 	wsr	a0, ps
 
-	/* Preserve the pointer to the boot parameter list in EXCSAVE_1 */
-
-	wsr	a2, excsave1
-
 	/* Start with a fresh windowbase and windowstart.  */
 
 	movi	a1, 1
@@ -86,7 +101,9 @@ ENTRY(_startup)
 	/* Clear debugging registers. */
 
 #if XCHAL_HAVE_DEBUG
+#if XCHAL_NUM_IBREAK > 0
 	wsr	a0, ibreakenable
+#endif
 	wsr	a0, icount
 	movi	a1, 15
 	wsr	a0, icountlevel
@@ -156,8 +173,6 @@ ENTRY(_startup)
 
 	isync
 
-	initialize_mmu
-
 	/* Unpack data sections
 	 *
 	 * The linker script used to build the Linux kernel image
@@ -205,6 +220,10 @@ ENTRY(_startup)
 
 	___flush_dcache_all a2 a3
 #endif
+	memw
+	isync
+	___invalidate_icache_all a2 a3
+	isync
 
 	/* Setup stack and enable window exceptions (keep irqs disabled) */
 
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index 44bf21c..2bd6c35 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -36,6 +36,7 @@ _F(void, power_off, (void), { while(1); });
 _F(void, idle, (void), { __asm__ __volatile__ ("waiti 0" ::: "memory"); });
 _F(void, heartbeat, (void), { });
 _F(int,  pcibios_fixup, (void), { return 0; });
+_F(void, pcibios_init, (void), { });
 
 #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 _F(void, calibrate_ccount, (void),
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
new file mode 100644
index 0000000..7d2c317
--- /dev/null
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -0,0 +1,120 @@
+/*
+ * arch/xtensa/kernel/stacktrace.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2013 Tensilica Inc.
+ */
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+#include <asm/traps.h>
+
+void walk_stackframe(unsigned long *sp,
+		int (*fn)(struct stackframe *frame, void *data),
+		void *data)
+{
+	unsigned long a0, a1;
+	unsigned long sp_end;
+
+	a1 = (unsigned long)sp;
+	sp_end = ALIGN(a1, THREAD_SIZE);
+
+	spill_registers();
+
+	while (a1 < sp_end) {
+		struct stackframe frame;
+
+		sp = (unsigned long *)a1;
+
+		a0 = *(sp - 4);
+		a1 = *(sp - 3);
+
+		if (a1 <= (unsigned long)sp)
+			break;
+
+		frame.pc = MAKE_PC_FROM_RA(a0, a1);
+		frame.sp = a1;
+
+		if (fn(&frame, data))
+			return;
+	}
+}
+
+#ifdef CONFIG_STACKTRACE
+
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned skip;
+};
+
+static int stack_trace_cb(struct stackframe *frame, void *data)
+{
+	struct stack_trace_data *trace_data = data;
+	struct stack_trace *trace = trace_data->trace;
+
+	if (trace_data->skip) {
+		--trace_data->skip;
+		return 0;
+	}
+	if (!kernel_text_address(frame->pc))
+		return 0;
+
+	trace->entries[trace->nr_entries++] = frame->pc;
+	return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+	struct stack_trace_data trace_data = {
+		.trace = trace,
+		.skip = trace->skip,
+	};
+	walk_stackframe(stack_pointer(task), stack_trace_cb, &trace_data);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+#endif
+
+#ifdef CONFIG_FRAME_POINTER
+
+struct return_addr_data {
+	unsigned long addr;
+	unsigned skip;
+};
+
+static int return_address_cb(struct stackframe *frame, void *data)
+{
+	struct return_addr_data *r = data;
+
+	if (r->skip) {
+		--r->skip;
+		return 0;
+	}
+	if (!kernel_text_address(frame->pc))
+		return 0;
+	r->addr = frame->pc;
+	return 1;
+}
+
+unsigned long return_address(unsigned level)
+{
+	struct return_addr_data r = {
+		.skip = level + 1,
+	};
+	walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
+	return r.addr;
+}
+EXPORT_SYMBOL(return_address);
+
+#endif
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 458186d..3e8a05c 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -11,7 +11,7 @@
  *
  * Essentially rewritten for the Xtensa architecture port.
  *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2001 - 2013 Tensilica Inc.
  *
  * Joe Taylor	<joe@tensilica.com, joetylr@yahoo.com>
  * Chris Zankel	<chris@zankel.net>
@@ -32,6 +32,7 @@
 #include <linux/delay.h>
 #include <linux/hardirq.h>
 
+#include <asm/stacktrace.h>
 #include <asm/ptrace.h>
 #include <asm/timex.h>
 #include <asm/uaccess.h>
@@ -195,7 +196,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
 
 /*
  * IRQ handler.
- * PS.INTLEVEL is the current IRQ priority level.
  */
 
 extern void do_IRQ(int, struct pt_regs *);
@@ -212,18 +212,21 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL6_MASK,
 		XCHAL_INTLEVEL7_MASK,
 	};
-	unsigned level = get_sr(ps) & PS_INTLEVEL_MASK;
-
-	if (WARN_ON_ONCE(level >= ARRAY_SIZE(int_level_mask)))
-		return;
 
 	for (;;) {
 		unsigned intread = get_sr(interrupt);
 		unsigned intenable = get_sr(intenable);
-		unsigned int_at_level = intread & intenable &
-			int_level_mask[level];
+		unsigned int_at_level = intread & intenable;
+		unsigned level;
+
+		for (level = LOCKLEVEL; level > 0; --level) {
+			if (int_at_level & int_level_mask[level]) {
+				int_at_level &= int_level_mask[level];
+				break;
+			}
+		}
 
-		if (!int_at_level)
+		if (level == 0)
 			return;
 
 		/*
@@ -404,53 +407,25 @@ void show_regs(struct pt_regs * regs)
 		       regs->syscall);
 }
 
-static __always_inline unsigned long *stack_pointer(struct task_struct *task)
+static int show_trace_cb(struct stackframe *frame, void *data)
 {
-	unsigned long *sp;
-
-	if (!task || task == current)
-		__asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp));
-	else
-		sp = (unsigned long *)task->thread.sp;
-
-	return sp;
+	if (kernel_text_address(frame->pc)) {
+		printk(" [<%08lx>] ", frame->pc);
+		print_symbol("%s\n", frame->pc);
+	}
+	return 0;
 }
 
 void show_trace(struct task_struct *task, unsigned long *sp)
 {
-	unsigned long a0, a1, pc;
-	unsigned long sp_start, sp_end;
-
-	if (sp)
-		a1 = (unsigned long)sp;
-	else
-		a1 = (unsigned long)stack_pointer(task);
-
-	sp_start = a1 & ~(THREAD_SIZE-1);
-	sp_end = sp_start + THREAD_SIZE;
+	if (!sp)
+		sp = stack_pointer(task);
 
 	printk("Call Trace:");
 #ifdef CONFIG_KALLSYMS
 	printk("\n");
 #endif
-	spill_registers();
-
-	while (a1 > sp_start && a1 < sp_end) {
-		sp = (unsigned long*)a1;
-
-		a0 = *(sp - 4);
-		a1 = *(sp - 3);
-
-		if (a1 <= (unsigned long) sp)
-			break;
-
-		pc = MAKE_PC_FROM_RA(a0, a1);
-
-		if (kernel_text_address(pc)) {
-			printk(" [<%08lx>] ", pc);
-			print_symbol("%s\n", pc);
-		}
-	}
+	walk_stackframe(sp, show_trace_cb, NULL);
 	printk("\n");
 }
 
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 82109b42..f9e1753 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -50,6 +50,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
+#include <asm/vectors.h>
 
 #define WINDOW_VECTORS_SIZE   0x180
 
@@ -220,7 +221,7 @@ ENTRY(_DoubleExceptionVector)
 
 	xsr	a0, depc		# get DEPC, save a0
 
-	movi	a3, XCHAL_WINDOW_VECTORS_VADDR
+	movi	a3, WINDOW_VECTORS_VADDR
 	_bltu	a0, a3, .Lfixup
 	addi	a3, a3, WINDOW_VECTORS_SIZE
 	_bgeu	a0, a3, .Lfixup
@@ -385,9 +386,12 @@ ENDPROC(_DebugInterruptVector)
 	.if	XCHAL_EXCM_LEVEL >= \level
 	.section .Level\level\()InterruptVector.text, "ax"
 ENTRY(_Level\level\()InterruptVector)
-	wsr	a0, epc1
+	wsr	a0, excsave2
 	rsr	a0, epc\level
-	xsr	a0, epc1
+	wsr	a0, epc1
+	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
+	wsr	a0, exccause
+	rsr	a0, eps\level
 					# branch to user or kernel vector
 	j	_SimulateUserKernelVectorException
 	.endif
@@ -439,10 +443,8 @@ ENDPROC(_WindowOverflow4)
 	 */
 	.align 4
 _SimulateUserKernelVectorException:
-	wsr	a0, excsave2
-	movi	a0, 4			# LEVEL1_INTERRUPT cause
-	wsr	a0, exccause
-	rsr	a0, ps
+	addi	a0, a0, (1 << PS_EXCM_BIT)
+	wsr	a0, ps
 	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
 	rsr	a0, excsave2		# restore a0
 	j	_KernelExceptionVector	# simulate kernel vector exception
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 1469524..21acd11 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -18,6 +18,7 @@
 #include <asm/page.h>
 #include <asm/thread_info.h>
 
+#include <asm/vectors.h>
 #include <variant/core.h>
 #include <platform/hardware.h>
 OUTPUT_ARCH(xtensa)
@@ -30,7 +31,7 @@ jiffies = jiffies_64;
 #endif
 
 #ifndef KERNELOFFSET
-#define KERNELOFFSET 0xd0001000
+#define KERNELOFFSET 0xd0003000
 #endif
 
 /* Note: In the following macros, it would be nice to specify only the
@@ -185,16 +186,16 @@ SECTIONS
 
   SECTION_VECTOR (_WindowVectors_text,
 		  .WindowVectors.text,
-		  XCHAL_WINDOW_VECTORS_VADDR, 4,
+		  WINDOW_VECTORS_VADDR, 4,
 		  .dummy)
   SECTION_VECTOR (_DebugInterruptVector_literal,
 		  .DebugInterruptVector.literal,
-		  XCHAL_DEBUG_VECTOR_VADDR - 4,
+		  DEBUG_VECTOR_VADDR - 4,
 		  SIZEOF(.WindowVectors.text),
 		  .WindowVectors.text)
   SECTION_VECTOR (_DebugInterruptVector_text,
 		  .DebugInterruptVector.text,
-		  XCHAL_DEBUG_VECTOR_VADDR,
+		  DEBUG_VECTOR_VADDR,
 		  4,
 		  .DebugInterruptVector.literal)
 #undef LAST
@@ -202,7 +203,7 @@ SECTIONS
 #if XCHAL_EXCM_LEVEL >= 2
   SECTION_VECTOR (_Level2InterruptVector_text,
 		  .Level2InterruptVector.text,
-		  XCHAL_INTLEVEL2_VECTOR_VADDR,
+		  INTLEVEL2_VECTOR_VADDR,
 		  SIZEOF(LAST), LAST)
 # undef LAST
 # define LAST	.Level2InterruptVector.text
@@ -210,7 +211,7 @@ SECTIONS
 #if XCHAL_EXCM_LEVEL >= 3
   SECTION_VECTOR (_Level3InterruptVector_text,
 		  .Level3InterruptVector.text,
-		  XCHAL_INTLEVEL3_VECTOR_VADDR,
+		  INTLEVEL3_VECTOR_VADDR,
 		  SIZEOF(LAST), LAST)
 # undef LAST
 # define LAST	.Level3InterruptVector.text
@@ -218,7 +219,7 @@ SECTIONS
 #if XCHAL_EXCM_LEVEL >= 4
   SECTION_VECTOR (_Level4InterruptVector_text,
 		  .Level4InterruptVector.text,
-		  XCHAL_INTLEVEL4_VECTOR_VADDR,
+		  INTLEVEL4_VECTOR_VADDR,
 		  SIZEOF(LAST), LAST)
 # undef LAST
 # define LAST	.Level4InterruptVector.text
@@ -226,7 +227,7 @@ SECTIONS
 #if XCHAL_EXCM_LEVEL >= 5
   SECTION_VECTOR (_Level5InterruptVector_text,
 		  .Level5InterruptVector.text,
-		  XCHAL_INTLEVEL5_VECTOR_VADDR,
+		  INTLEVEL5_VECTOR_VADDR,
 		  SIZEOF(LAST), LAST)
 # undef LAST
 # define LAST	.Level5InterruptVector.text
@@ -234,39 +235,39 @@ SECTIONS
 #if XCHAL_EXCM_LEVEL >= 6
   SECTION_VECTOR (_Level6InterruptVector_text,
 		  .Level6InterruptVector.text,
-		  XCHAL_INTLEVEL6_VECTOR_VADDR,
+		  INTLEVEL6_VECTOR_VADDR,
 		  SIZEOF(LAST), LAST)
 # undef LAST
 # define LAST	.Level6InterruptVector.text
 #endif
   SECTION_VECTOR (_KernelExceptionVector_literal,
 		  .KernelExceptionVector.literal,
-		  XCHAL_KERNEL_VECTOR_VADDR - 4,
+		  KERNEL_VECTOR_VADDR - 4,
 		  SIZEOF(LAST), LAST)
 #undef LAST
   SECTION_VECTOR (_KernelExceptionVector_text,
 		  .KernelExceptionVector.text,
-		  XCHAL_KERNEL_VECTOR_VADDR,
+		  KERNEL_VECTOR_VADDR,
 		  4,
 		  .KernelExceptionVector.literal)
   SECTION_VECTOR (_UserExceptionVector_literal,
 		  .UserExceptionVector.literal,
-		  XCHAL_USER_VECTOR_VADDR - 4,
+		  USER_VECTOR_VADDR - 4,
 		  SIZEOF(.KernelExceptionVector.text),
 		  .KernelExceptionVector.text)
   SECTION_VECTOR (_UserExceptionVector_text,
 		  .UserExceptionVector.text,
-		  XCHAL_USER_VECTOR_VADDR,
+		  USER_VECTOR_VADDR,
 		  4,
 		  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
 		  .DoubleExceptionVector.literal,
-		  XCHAL_DOUBLEEXC_VECTOR_VADDR - 16,
+		  DOUBLEEXC_VECTOR_VADDR - 16,
 		  SIZEOF(.UserExceptionVector.text),
 		  .UserExceptionVector.text)
   SECTION_VECTOR (_DoubleExceptionVector_text,
 		  .DoubleExceptionVector.text,
-		  XCHAL_DOUBLEEXC_VECTOR_VADDR,
+		  DOUBLEEXC_VECTOR_VADDR,
 		  32,
 		  .DoubleExceptionVector.literal)
 
@@ -284,11 +285,26 @@ SECTIONS
   . = ALIGN(0x10);
   .bootstrap : { *(.bootstrap.literal .bootstrap.text .bootstrap.data) }
 
-  .ResetVector.text XCHAL_RESET_VECTOR_VADDR :
+  .ResetVector.text RESET_VECTOR_VADDR :
   {
     *(.ResetVector.text)
   }
 
+
+  /*
+   * This is a remapped copy of the Secondary Reset Vector Code.
+   * It keeps gdb in sync with the PC after switching
+   * to the temporary mapping used while setting up
+   * the V2 MMU mappings for Linux.
+   *
+   * Only debug information about this section is put in the kernel image.
+   */
+  .SecondaryResetVector.remapped_text 0x46000000 (INFO):
+  {
+	*(.SecondaryResetVector.remapped_text)
+  }
+
+
   .xt.lit : { *(.xt.lit) }
   .xt.prop : { *(.xt.prop) }
 
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index afe058b..42c53c87 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -119,3 +119,8 @@ EXPORT_SYMBOL(outsl);
 EXPORT_SYMBOL(insb);
 EXPORT_SYMBOL(insw);
 EXPORT_SYMBOL(insl);
+
+extern long common_exception_return;
+extern long _spill_registers;
+EXPORT_SYMBOL(common_exception_return);
+EXPORT_SYMBOL(_spill_registers);
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 0f77f9d..a107757 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -24,15 +24,19 @@ void __init paging_init(void)
  */
 void __init init_mmu(void)
 {
-	/* Writing zeros to the <t>TLBCFG special registers ensure
-	 * that valid values exist in the register.  For existing
-	 * PGSZID<w> fields, zero selects the first element of the
-	 * page-size array.  For nonexistent PGSZID<w> fields, zero is
-	 * the best value to write.  Also, when changing PGSZID<w>
+#if !(XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY)
+	/*
+	 * Writing zeros to the instruction and data TLBCFG special
+	 * registers ensure that valid values exist in the register.
+	 *
+	 * For existing PGSZID<w> fields, zero selects the first element
+	 * of the page-size array.  For nonexistent PGSZID<w> fields,
+	 * zero is the best value to write.  Also, when changing PGSZID<w>
 	 * fields, the corresponding TLB must be flushed.
 	 */
 	set_itlbcfg_register(0);
 	set_dtlbcfg_register(0);
+#endif
 	flush_tlb_all();
 
 	/* Set rasid register to a known value. */
diff --git a/arch/xtensa/oprofile/backtrace.c b/arch/xtensa/oprofile/backtrace.c
index 66f32ee..5f03a59 100644
--- a/arch/xtensa/oprofile/backtrace.c
+++ b/arch/xtensa/oprofile/backtrace.c
@@ -132,9 +132,7 @@ static void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth)
 		pc = MAKE_PC_FROM_RA(a0, pc);
 
 		/* Add the PC to the trace. */
-		if (kernel_text_address(pc))
-			oprofile_add_trace(pc);
-
+		oprofile_add_trace(pc);
 		if (pc == (unsigned long) &common_exception_return) {
 			regs = (struct pt_regs *)a1;
 			if (user_mode(regs)) {
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index da9866f..70cb408 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -56,13 +56,13 @@ static void rs_poll(unsigned long);
 static int rs_open(struct tty_struct *tty, struct file * filp)
 {
 	tty->port = &serial_port;
-	spin_lock(&timer_lock);
+	spin_lock_bh(&timer_lock);
 	if (tty->count == 1) {
 		setup_timer(&serial_timer, rs_poll,
 				(unsigned long)&serial_port);
 		mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
 	}
-	spin_unlock(&timer_lock);
+	spin_unlock_bh(&timer_lock);
 
 	return 0;
 }
@@ -99,14 +99,13 @@ static int rs_write(struct tty_struct * tty,
 static void rs_poll(unsigned long priv)
 {
 	struct tty_port *port = (struct tty_port *)priv;
-	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 	int i = 0;
 	unsigned char c;
 
 	spin_lock(&timer_lock);
 
-	while (__simc(SYS_select_one, 0, XTISS_SELECT_ONE_READ, (int)&tv,0,0)){
-		__simc (SYS_read, 0, (unsigned long)&c, 1, 0, 0);
+	while (simc_poll(0)) {
+		simc_read(0, &c, 1);
 		tty_insert_flip_char(port, c, TTY_NORMAL);
 		i++;
 	}
@@ -244,8 +243,7 @@ static void iss_console_write(struct console *co, const char *s, unsigned count)
 	int len = strlen(s);
 
 	if (s != 0 && *s != 0)
-		__simc (SYS_write, 1, (unsigned long)s,
-			count < len ? count : len,0,0);
+		simc_write(1, s, count < len ? count : len);
 }
 
 static struct tty_driver* iss_console_device(struct console *c, int *index)
diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h
index b5a4edf..12b15ad 100644
--- a/arch/xtensa/platforms/iss/include/platform/simcall.h
+++ b/arch/xtensa/platforms/iss/include/platform/simcall.h
@@ -59,56 +59,58 @@
 
 static int errno;
 
-static inline int __simc(int a, int b, int c, int d, int e, int f)
+static inline int __simc(int a, int b, int c, int d)
 {
 	int ret;
 	register int a1 asm("a2") = a;
 	register int b1 asm("a3") = b;
 	register int c1 asm("a4") = c;
 	register int d1 asm("a5") = d;
-	register int e1 asm("a6") = e;
-	register int f1 asm("a7") = f;
 	__asm__ __volatile__ (
 			"simcall\n"
 			"mov %0, a2\n"
 			"mov %1, a3\n"
 			: "=a" (ret), "=a" (errno), "+r"(a1), "+r"(b1)
-			: "r"(c1), "r"(d1), "r"(e1), "r"(f1)
+			: "r"(c1), "r"(d1)
 			: "memory");
 	return ret;
 }
 
 static inline int simc_open(const char *file, int flags, int mode)
 {
-	return __simc(SYS_open, (int) file, flags, mode, 0, 0);
+	return __simc(SYS_open, (int) file, flags, mode);
 }
 
 static inline int simc_close(int fd)
 {
-	return __simc(SYS_close, fd, 0, 0, 0, 0);
+	return __simc(SYS_close, fd, 0, 0);
 }
 
 static inline int simc_ioctl(int fd, int request, void *arg)
 {
-	return __simc(SYS_ioctl, fd, request, (int) arg, 0, 0);
+	return __simc(SYS_ioctl, fd, request, (int) arg);
 }
 
 static inline int simc_read(int fd, void *buf, size_t count)
 {
-	return __simc(SYS_read, fd, (int) buf, count, 0, 0);
+	return __simc(SYS_read, fd, (int) buf, count);
 }
 
 static inline int simc_write(int fd, const void *buf, size_t count)
 {
-	return __simc(SYS_write, fd, (int) buf, count, 0, 0);
+	return __simc(SYS_write, fd, (int) buf, count);
 }
 
 static inline int simc_poll(int fd)
 {
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
-	return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv,
-			0, 0);
+	return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv);
+}
+
+static inline int simc_lseek(int fd, uint32_t off, int whence)
+{
+	return __simc(SYS_lseek, fd, off, whence);
 }
 
 #endif /* _XTENSA_PLATFORM_ISS_SIMCALL_H */
diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c
index e170010..da7d182 100644
--- a/arch/xtensa/platforms/iss/setup.c
+++ b/arch/xtensa/platforms/iss/setup.c
@@ -38,12 +38,6 @@ void __init platform_init(bp_tag_t* bootparam)
 
 }
 
-#ifdef CONFIG_PCI
-void platform_pcibios_init(void)
-{
-}
-#endif
-
 void platform_halt(void)
 {
 	pr_info(" ** Called platform_halt() **\n");
@@ -64,7 +58,9 @@ void platform_restart(void)
 			     "wsr	a2, icountlevel\n\t"
 			     "movi	a2, 0\n\t"
 			     "wsr	a2, icount\n\t"
+#if XCHAL_NUM_IBREAK > 0
 			     "wsr	a2, ibreakenable\n\t"
+#endif
 			     "wsr	a2, lcount\n\t"
 			     "movi	a2, 0x1f\n\t"
 			     "wsr	a2, ps\n\t"
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 0345f43..c0edb35 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -85,7 +85,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
 	while (nbytes > 0) {
 		unsigned long io;
 
-		__simc(SYS_lseek, dev->fd, offset, SEEK_SET, 0, 0);
+		simc_lseek(dev->fd, offset, SEEK_SET);
 		if (write)
 			io = simc_write(dev->fd, buffer, nbytes);
 		else
@@ -176,7 +176,7 @@ static int simdisk_attach(struct simdisk *dev, const char *filename)
 		err = -ENODEV;
 		goto out;
 	}
-	dev->size = __simc(SYS_lseek, dev->fd, 0, SEEK_END, 0, 0);
+	dev->size = simc_lseek(dev->fd, 0, SEEK_END);
 	set_capacity(dev->gd, dev->size >> SECTOR_SHIFT);
 	dev->filename = filename;
 	pr_info("SIMDISK: %s=%s\n", dev->gd->disk_name, dev->filename);
@@ -217,7 +217,7 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
 			size_t size, loff_t *ppos)
 {
 	struct simdisk *dev = PDE_DATA(file_inode(file));
-	char *s = dev->filename;
+	const char *s = dev->filename;
 	if (s) {
 		ssize_t n = simple_read_from_buffer(buf, size, ppos,
 							s, strlen(s));
@@ -238,7 +238,7 @@ static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
 
 	if (tmp == NULL)
 		return -ENOMEM;
-	if (copy_from_user(tmp, buffer, count)) {
+	if (copy_from_user(tmp, buf, count)) {
 		err = -EFAULT;
 		goto out_free;
 	}
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index c7d90f1..f9bc879 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -69,7 +69,9 @@ void platform_restart(void)
 			      "wsr	a2, icountlevel\n\t"
 			      "movi	a2, 0\n\t"
 			      "wsr	a2, icount\n\t"
+#if XCHAL_NUM_IBREAK > 0
 			      "wsr	a2, ibreakenable\n\t"
+#endif
 			      "wsr	a2, lcount\n\t"
 			      "movi	a2, 0x1f\n\t"
 			      "wsr	a2, ps\n\t"
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 9d888a2..96ef8ee 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -60,7 +60,9 @@ void platform_restart(void)
 			      "wsr	a2, icountlevel\n\t"
 			      "movi	a2, 0\n\t"
 			      "wsr	a2, icount\n\t"
+#if XCHAL_NUM_IBREAK > 0
 			      "wsr	a2, ibreakenable\n\t"
+#endif
 			      "wsr	a2, lcount\n\t"
 			      "movi	a2, 0x1f\n\t"
 			      "wsr	a2, ps\n\t"
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 66e025a..f3c2d03 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -930,7 +930,7 @@ static int tile_net_setup_interrupts(struct net_device *dev)
 		if (info->has_iqueue) {
 			gxio_mpipe_request_notif_ring_interrupt(
 				&context, cpu_x(cpu), cpu_y(cpu),
-				1, ingress_irq, info->iqueue.ring);
+				KERNEL_PL, ingress_irq, info->iqueue.ring);
 		}
 	}
 
diff --git a/drivers/rtc/rtc-tile.c b/drivers/rtc/rtc-tile.c
index 249b653..fc3dee9 100644
--- a/drivers/rtc/rtc-tile.c
+++ b/drivers/rtc/rtc-tile.c
@@ -146,6 +146,7 @@ exit_driver_unregister:
  */
 static void __exit tile_rtc_driver_exit(void)
 {
+	platform_device_unregister(tile_rtc_platform_device);
 	platform_driver_unregister(&tile_rtc_platform_driver);
 }
 
diff --git a/drivers/usb/host/ehci-tilegx.c b/drivers/usb/host/ehci-tilegx.c
index 1d215cd..b083a35 100644
--- a/drivers/usb/host/ehci-tilegx.c
+++ b/drivers/usb/host/ehci-tilegx.c
@@ -118,8 +118,10 @@ static int ehci_hcd_tilegx_drv_probe(struct platform_device *pdev)
 
 	hcd = usb_create_hcd(&ehci_tilegx_hc_driver, &pdev->dev,
 			     dev_name(&pdev->dev));
-	if (!hcd)
-		return -ENOMEM;
+	if (!hcd) {
+          ret = -ENOMEM;
+          goto err_hcd;
+        }
 
 	/*
 	 * We don't use rsrc_start to map in our registers, but seems like
@@ -176,6 +178,7 @@ err_have_irq:
 err_no_irq:
 	tilegx_stop_ehc();
 	usb_put_hcd(hcd);
+err_hcd:
 	gxio_usb_host_destroy(&pdata->usb_ctx);
 	return ret;
 }
diff --git a/drivers/usb/host/ohci-tilegx.c b/drivers/usb/host/ohci-tilegx.c
index 1ae7b28..ea73009 100644
--- a/drivers/usb/host/ohci-tilegx.c
+++ b/drivers/usb/host/ohci-tilegx.c
@@ -112,8 +112,10 @@ static int ohci_hcd_tilegx_drv_probe(struct platform_device *pdev)
 
 	hcd = usb_create_hcd(&ohci_tilegx_hc_driver, &pdev->dev,
 			     dev_name(&pdev->dev));
-	if (!hcd)
-		return -ENOMEM;
+	if (!hcd) {
+		ret = -ENOMEM;
+		goto err_hcd;
+	}
 
 	/*
 	 * We don't use rsrc_start to map in our registers, but seems like
@@ -165,6 +167,7 @@ err_have_irq:
 err_no_irq:
 	tilegx_stop_ohc();
 	usb_put_hcd(hcd);
+err_hcd:
 	gxio_usb_host_destroy(&pdata->usb_ctx);
 	return ret;
 }
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 9a8622a..2b3b832 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,5 +1,5 @@
 config BTRFS_FS
-	tristate "Btrfs filesystem Unstable disk format"
+	tristate "Btrfs filesystem support"
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
@@ -52,3 +52,23 @@ config BTRFS_FS_CHECK_INTEGRITY
 	  In most cases, unless you are a btrfs developer who needs
 	  to verify the integrity of (super)-block write requests
 	  during the run of a regression test, say N
+
+config BTRFS_FS_RUN_SANITY_TESTS
+	bool "Btrfs will run sanity tests upon loading"
+	depends on BTRFS_FS
+	help
+	  This will run some basic sanity tests on the free space cache
+	  code to make sure it is acting as it should.  These are mostly
+	  regression tests and are only really interesting to btrfs devlopers.
+
+	  If unsure, say N.
+
+config BTRFS_DEBUG
+	bool "Btrfs debugging support"
+	depends on BTRFS_FS
+	help
+	  Enable run-time debugging support for the btrfs filesystem. This may
+	  enable additional and expensive checks with negative impact on
+	  performance, or export extra information via sysfs.
+
+	  If unsure, say N.
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index bd605c8..b4fb415 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -352,6 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 		err = __resolve_indirect_ref(fs_info, search_commit_root,
 					     time_seq, ref, parents,
 					     extent_item_pos);
+		if (err == -ENOMEM)
+			goto out;
 		if (err)
 			continue;
 
@@ -367,7 +369,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 			new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
 			if (!new_ref) {
 				ret = -ENOMEM;
-				break;
+				goto out;
 			}
 			memcpy(new_ref, ref, sizeof(*ref));
 			new_ref->parent = node->val;
@@ -377,7 +379,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 		}
 		ulist_reinit(parents);
 	}
-
+out:
 	ulist_free(parents);
 	return ret;
 }
@@ -421,7 +423,10 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 		BUG_ON(!ref->wanted_disk_byte);
 		eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
 				     fs_info->tree_root->leafsize, 0);
-		BUG_ON(!eb);
+		if (!eb || !extent_buffer_uptodate(eb)) {
+			free_extent_buffer(eb);
+			return -EIO;
+		}
 		btrfs_tree_read_lock(eb);
 		if (btrfs_header_level(eb) == 0)
 			btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
@@ -443,7 +448,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
  *           having a parent).
  * mode = 2: merge identical parents
  */
-static int __merge_refs(struct list_head *head, int mode)
+static void __merge_refs(struct list_head *head, int mode)
 {
 	struct list_head *pos1;
 
@@ -489,7 +494,6 @@ static int __merge_refs(struct list_head *head, int mode)
 		}
 
 	}
-	return 0;
 }
 
 /*
@@ -582,7 +586,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
 		default:
 			WARN_ON(1);
 		}
-		BUG_ON(ret);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -680,7 +685,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 		default:
 			WARN_ON(1);
 		}
-		BUG_ON(ret);
+		if (ret)
+			return ret;
 		ptr += btrfs_extent_inline_ref_size(type);
 	}
 
@@ -762,7 +768,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
 		default:
 			WARN_ON(1);
 		}
-		BUG_ON(ret);
+		if (ret)
+			return ret;
+
 	}
 
 	return ret;
@@ -880,18 +888,14 @@ again:
 	if (ret)
 		goto out;
 
-	ret = __merge_refs(&prefs, 1);
-	if (ret)
-		goto out;
+	__merge_refs(&prefs, 1);
 
 	ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
 				      &prefs, extent_item_pos);
 	if (ret)
 		goto out;
 
-	ret = __merge_refs(&prefs, 2);
-	if (ret)
-		goto out;
+	__merge_refs(&prefs, 2);
 
 	while (!list_empty(&prefs)) {
 		ref = list_first_entry(&prefs, struct __prelim_ref, list);
@@ -900,7 +904,8 @@ again:
 		if (ref->count && ref->root_id && ref->parent == 0) {
 			/* no parent == root of tree */
 			ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
-			BUG_ON(ret < 0);
+			if (ret < 0)
+				goto out;
 		}
 		if (ref->count && ref->parent) {
 			struct extent_inode_elem *eie = NULL;
@@ -911,7 +916,10 @@ again:
 							info_level);
 				eb = read_tree_block(fs_info->extent_root,
 							   ref->parent, bsz, 0);
-				BUG_ON(!eb);
+				if (!eb || !extent_buffer_uptodate(eb)) {
+					free_extent_buffer(eb);
+					return -EIO;
+				}
 				ret = find_extent_in_eb(eb, bytenr,
 							*extent_item_pos, &eie);
 				ref->inode_list = eie;
@@ -920,6 +928,8 @@ again:
 			ret = ulist_add_merge(refs, ref->parent,
 					      (uintptr_t)ref->inode_list,
 					      (u64 *)&eie, GFP_NOFS);
+			if (ret < 0)
+				goto out;
 			if (!ret && extent_item_pos) {
 				/*
 				 * we've recorded that parent, so we must extend
@@ -930,7 +940,6 @@ again:
 					eie = eie->next;
 				eie->next = ref->inode_list;
 			}
-			BUG_ON(ret < 0);
 		}
 		kfree(ref);
 	}
@@ -1180,6 +1189,20 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
 	return ret;
 }
 
+/*
+ * this iterates to turn a name (from iref/extref) into a full filesystem path.
+ * Elements of the path are separated by '/' and the path is guaranteed to be
+ * 0-terminated. the path is only given within the current file system.
+ * Therefore, it never starts with a '/'. the caller is responsible to provide
+ * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
+ * the start point of the resulting string is returned. this pointer is within
+ * dest, normally.
+ * in case the path buffer would overflow, the pointer is decremented further
+ * as if output was written to the buffer, though no more output is actually
+ * generated. that way, the caller can determine how much space would be
+ * required for the path to fit into the buffer. in that case, the returned
+ * value will be smaller than dest. callers must check this!
+ */
 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 			u32 name_len, unsigned long name_off,
 			struct extent_buffer *eb_in, u64 parent,
@@ -1249,32 +1272,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 }
 
 /*
- * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
- * of the path are separated by '/' and the path is guaranteed to be
- * 0-terminated. the path is only given within the current file system.
- * Therefore, it never starts with a '/'. the caller is responsible to provide
- * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
- * the start point of the resulting string is returned. this pointer is within
- * dest, normally.
- * in case the path buffer would overflow, the pointer is decremented further
- * as if output was written to the buffer, though no more output is actually
- * generated. that way, the caller can determine how much space would be
- * required for the path to fit into the buffer. in that case, the returned
- * value will be smaller than dest. callers must check this!
- */
-char *btrfs_iref_to_path(struct btrfs_root *fs_root,
-			 struct btrfs_path *path,
-			 struct btrfs_inode_ref *iref,
-			 struct extent_buffer *eb_in, u64 parent,
-			 char *dest, u32 size)
-{
-	return btrfs_ref_to_path(fs_root, path,
-				 btrfs_inode_ref_name_len(eb_in, iref),
-				 (unsigned long)(iref + 1),
-				 eb_in, parent, dest, size);
-}
-
-/*
  * this makes the path point to (logical EXTENT_ITEM *)
  * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
  * tree blocks and <0 on error.
@@ -1461,8 +1458,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 				iterate_extent_inodes_t *iterate, void *ctx)
 {
 	int ret;
-	struct list_head data_refs = LIST_HEAD_INIT(data_refs);
-	struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
 	struct btrfs_trans_handle *trans;
 	struct ulist *refs = NULL;
 	struct ulist *roots = NULL;
@@ -1508,11 +1503,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 						iterate, ctx);
 		}
 		ulist_free(roots);
-		roots = NULL;
 	}
 
 	free_leaf_list(refs);
-	ulist_free(roots);
 out:
 	if (!search_commit_root) {
 		btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 310a7f6..0f446d7 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
 				struct btrfs_fs_info *fs_info, u64 bytenr,
 				u64 time_seq, struct ulist **roots);
-char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
-			 struct btrfs_inode_ref *iref, struct extent_buffer *eb,
-			 u64 parent, char *dest, u32 size);
 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 			u32 name_len, unsigned long name_off,
 			struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d9b97d4..08b286b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -93,7 +93,7 @@ struct btrfs_inode {
 
 	unsigned long runtime_flags;
 
-	/* Keep track of who's O_SYNC/fsycing currently */
+	/* Keep track of who's O_SYNC/fsyncing currently */
 	atomic_t sync_writers;
 
 	/* full 64 bit generation number, struct vfs_inode doesn't have a big
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 15b9408..b189bd1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -82,6 +82,10 @@ struct compressed_bio {
 	u32 sums;
 };
 
+static int btrfs_decompress_biovec(int type, struct page **pages_in,
+				   u64 disk_start, struct bio_vec *bvec,
+				   int vcnt, size_t srclen);
+
 static inline int compressed_bio_size(struct btrfs_root *root,
 				      unsigned long disk_size)
 {
@@ -106,7 +110,6 @@ static int check_compressed_csum(struct inode *inode,
 				 u64 disk_start)
 {
 	int ret;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page *page;
 	unsigned long i;
 	char *kaddr;
@@ -121,7 +124,7 @@ static int check_compressed_csum(struct inode *inode,
 		csum = ~(u32)0;
 
 		kaddr = kmap_atomic(page);
-		csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
+		csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
 		btrfs_csum_final(csum, (char *)&csum);
 		kunmap_atomic(kaddr);
 
@@ -739,7 +742,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
 static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
 static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
 
-struct btrfs_compress_op *btrfs_compress_op[] = {
+static struct btrfs_compress_op *btrfs_compress_op[] = {
 	&btrfs_zlib_compress,
 	&btrfs_lzo_compress,
 };
@@ -910,8 +913,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
  * be contiguous.  They all correspond to the range of bytes covered by
  * the compressed extent.
  */
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
-			    struct bio_vec *bvec, int vcnt, size_t srclen)
+static int btrfs_decompress_biovec(int type, struct page **pages_in,
+				   u64 disk_start, struct bio_vec *bvec,
+				   int vcnt, size_t srclen)
 {
 	struct list_head *workspace;
 	int ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9afb0a6..0c803b4 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 unsigned long *total_in,
 			 unsigned long *total_out,
 			 unsigned long max_out);
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
-			    struct bio_vec *bvec, int vcnt, size_t srclen);
 int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 		     unsigned long start_byte, size_t srclen, size_t destlen);
 int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ca9d8f1..de6de8e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,16 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct extent_buffer *dst_buf,
 			      struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct btrfs_path *path, int level, int slot);
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+		    int level, int slot);
 static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 				 struct extent_buffer *eb);
-struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
-					  u32 blocksize, u64 parent_transid,
-					  u64 time_seq);
-struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
-						u64 bytenr, u32 blocksize,
-						u64 time_seq);
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 
 struct btrfs_path *btrfs_alloc_path(void)
 {
@@ -208,7 +203,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
  * tree until you end up with a lock on the root.  A locked buffer
  * is returned, with a reference held.
  */
-struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
 {
 	struct extent_buffer *eb;
 
@@ -361,6 +356,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
 }
 
 /*
+ * Increment the upper half of tree_mod_seq, set lower half zero.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
+{
+	u64 seq = atomic64_read(&fs_info->tree_mod_seq);
+	seq &= 0xffffffff00000000ull;
+	seq += 1ull << 32;
+	atomic64_set(&fs_info->tree_mod_seq, seq);
+	return seq;
+}
+
+/*
+ * Increment the lower half of tree_mod_seq.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
+ * are generated should not technically require a spin lock here. (Rationale:
+ * incrementing the minor while incrementing the major seq number is between its
+ * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
+ * just returns a unique sequence number as usual.) We have decided to leave
+ * that requirement in here and rethink it once we notice it really imposes a
+ * problem on some workload.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+{
+	return atomic64_inc_return(&fs_info->tree_mod_seq);
+}
+
+/*
+ * return the last minor in the previous major tree_mod_seq number
+ */
+u64 btrfs_tree_mod_seq_prev(u64 seq)
+{
+	return (seq & 0xffffffff00000000ull) - 1ull;
+}
+
+/*
  * This adds a new blocker to the tree mod log's blocker list if the @elem
  * passed does not already have a sequence number set. So when a caller expects
  * to record tree modifications, it should ensure to set elem->seq to zero
@@ -376,10 +409,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 	tree_mod_log_write_lock(fs_info);
 	spin_lock(&fs_info->tree_mod_seq_lock);
 	if (!elem->seq) {
-		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
+		elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
 		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
 	}
-	seq = btrfs_inc_tree_mod_seq(fs_info);
+	seq = btrfs_inc_tree_mod_seq_minor(fs_info);
 	spin_unlock(&fs_info->tree_mod_seq_lock);
 	tree_mod_log_write_unlock(fs_info);
 
@@ -524,7 +557,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
 	if (!tm)
 		return -ENOMEM;
 
-	tm->seq = btrfs_inc_tree_mod_seq(fs_info);
+	spin_lock(&fs_info->tree_mod_seq_lock);
+	tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+	spin_unlock(&fs_info->tree_mod_seq_lock);
+
 	return tm->seq;
 }
 
@@ -643,7 +679,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 static noinline int
 tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *old_root,
-			 struct extent_buffer *new_root, gfp_t flags)
+			 struct extent_buffer *new_root, gfp_t flags,
+			 int log_removal)
 {
 	struct tree_mod_elem *tm;
 	int ret;
@@ -651,7 +688,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 	if (tree_mod_dont_log(fs_info, NULL))
 		return 0;
 
-	__tree_mod_log_free_eb(fs_info, old_root);
+	if (log_removal)
+		__tree_mod_log_free_eb(fs_info, old_root);
 
 	ret = tree_mod_alloc(fs_info, flags, &tm);
 	if (ret < 0)
@@ -738,7 +776,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
 static noinline void
 tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 		     struct extent_buffer *src, unsigned long dst_offset,
-		     unsigned long src_offset, int nr_items, int log_removal)
+		     unsigned long src_offset, int nr_items)
 {
 	int ret;
 	int i;
@@ -752,12 +790,10 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 	}
 
 	for (i = 0; i < nr_items; i++) {
-		if (log_removal) {
-			ret = tree_mod_log_insert_key_locked(fs_info, src,
-							i + src_offset,
-							MOD_LOG_KEY_REMOVE);
-			BUG_ON(ret < 0);
-		}
+		ret = tree_mod_log_insert_key_locked(fs_info, src,
+						i + src_offset,
+						MOD_LOG_KEY_REMOVE);
+		BUG_ON(ret < 0);
 		ret = tree_mod_log_insert_key_locked(fs_info, dst,
 						     i + dst_offset,
 						     MOD_LOG_KEY_ADD);
@@ -802,11 +838,12 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 
 static noinline void
 tree_mod_log_set_root_pointer(struct btrfs_root *root,
-			      struct extent_buffer *new_root_node)
+			      struct extent_buffer *new_root_node,
+			      int log_removal)
 {
 	int ret;
 	ret = tree_mod_log_insert_root(root->fs_info, root->node,
-				       new_root_node, GFP_NOFS);
+				       new_root_node, GFP_NOFS, log_removal);
 	BUG_ON(ret < 0);
 }
 
@@ -867,7 +904,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 
 	if (btrfs_block_can_be_shared(root, buf)) {
 		ret = btrfs_lookup_extent_info(trans, root, buf->start,
-					       buf->len, &refs, &flags);
+					       btrfs_header_level(buf), 1,
+					       &refs, &flags);
 		if (ret)
 			return ret;
 		if (refs == 0) {
@@ -1028,7 +1066,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 			parent_start = 0;
 
 		extent_buffer_get(cow);
-		tree_mod_log_set_root_pointer(root, cow);
+		tree_mod_log_set_root_pointer(root, cow, 1);
 		rcu_assign_pointer(root->node, cow);
 
 		btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1067,11 +1105,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
  */
 static struct tree_mod_elem *
 __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
-			   struct btrfs_root *root, u64 time_seq)
+			   struct extent_buffer *eb_root, u64 time_seq)
 {
 	struct tree_mod_elem *tm;
 	struct tree_mod_elem *found = NULL;
-	u64 root_logical = root->node->start;
+	u64 root_logical = eb_root->start;
 	int looped = 0;
 
 	if (!time_seq)
@@ -1105,7 +1143,6 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
 
 		found = tm;
 		root_logical = tm->old_root.logical;
-		BUG_ON(root_logical == root->node->start);
 		looped = 1;
 	}
 
@@ -1190,6 +1227,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
 	btrfs_set_header_nritems(eb, n);
 }
 
+/*
+ * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
+ * is returned. If rewind operations happen, a fresh buffer is returned. The
+ * returned buffer is always read-locked. If the returned buffer is not the
+ * input buffer, the lock on the input buffer is released and the input buffer
+ * is freed (its refcount is decremented).
+ */
 static struct extent_buffer *
 tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 		    u64 time_seq)
@@ -1223,8 +1267,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 	}
 
 	extent_buffer_get(eb_rewin);
+	btrfs_tree_read_unlock(eb);
 	free_extent_buffer(eb);
 
+	extent_buffer_get(eb_rewin);
+	btrfs_tree_read_lock(eb_rewin);
 	__tree_mod_log_rewind(eb_rewin, time_seq, tm);
 	WARN_ON(btrfs_header_nritems(eb_rewin) >
 		BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
@@ -1243,33 +1290,35 @@ static inline struct extent_buffer *
 get_old_root(struct btrfs_root *root, u64 time_seq)
 {
 	struct tree_mod_elem *tm;
-	struct extent_buffer *eb;
+	struct extent_buffer *eb = NULL;
+	struct extent_buffer *eb_root;
 	struct extent_buffer *old;
 	struct tree_mod_root *old_root = NULL;
 	u64 old_generation = 0;
 	u64 logical;
 	u32 blocksize;
 
-	eb = btrfs_read_lock_root_node(root);
-	tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+	eb_root = btrfs_read_lock_root_node(root);
+	tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
 	if (!tm)
-		return root->node;
+		return eb_root;
 
 	if (tm->op == MOD_LOG_ROOT_REPLACE) {
 		old_root = &tm->old_root;
 		old_generation = tm->generation;
 		logical = old_root->logical;
 	} else {
-		logical = root->node->start;
+		logical = eb_root->start;
 	}
 
 	tm = tree_mod_log_search(root->fs_info, logical, time_seq);
 	if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
-		btrfs_tree_read_unlock(root->node);
-		free_extent_buffer(root->node);
+		btrfs_tree_read_unlock(eb_root);
+		free_extent_buffer(eb_root);
 		blocksize = btrfs_level_size(root, old_root->level);
 		old = read_tree_block(root, logical, blocksize, 0);
-		if (!old) {
+		if (!old || !extent_buffer_uptodate(old)) {
+			free_extent_buffer(old);
 			pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
 				logical);
 			WARN_ON(1);
@@ -1278,13 +1327,13 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 			free_extent_buffer(old);
 		}
 	} else if (old_root) {
-		btrfs_tree_read_unlock(root->node);
-		free_extent_buffer(root->node);
+		btrfs_tree_read_unlock(eb_root);
+		free_extent_buffer(eb_root);
 		eb = alloc_dummy_extent_buffer(logical, root->nodesize);
 	} else {
-		eb = btrfs_clone_extent_buffer(root->node);
-		btrfs_tree_read_unlock(root->node);
-		free_extent_buffer(root->node);
+		eb = btrfs_clone_extent_buffer(eb_root);
+		btrfs_tree_read_unlock(eb_root);
+		free_extent_buffer(eb_root);
 	}
 
 	if (!eb)
@@ -1294,7 +1343,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 	if (old_root) {
 		btrfs_set_header_bytenr(eb, eb->start);
 		btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
-		btrfs_set_header_owner(eb, root->root_key.objectid);
+		btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
 		btrfs_set_header_level(eb, old_root->level);
 		btrfs_set_header_generation(eb, old_generation);
 	}
@@ -1311,15 +1360,15 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
 {
 	struct tree_mod_elem *tm;
 	int level;
+	struct extent_buffer *eb_root = btrfs_root_node(root);
 
-	tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+	tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
 	if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
 		level = tm->old_root.level;
 	} else {
-		rcu_read_lock();
-		level = btrfs_header_level(root->node);
-		rcu_read_unlock();
+		level = btrfs_header_level(eb_root);
 	}
+	free_extent_buffer(eb_root);
 
 	return level;
 }
@@ -1514,8 +1563,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 			if (!cur) {
 				cur = read_tree_block(root, blocknr,
 							 blocksize, gen);
-				if (!cur)
+				if (!cur || !extent_buffer_uptodate(cur)) {
+					free_extent_buffer(cur);
 					return -EIO;
+				}
 			} else if (!uptodate) {
 				err = btrfs_read_buffer(cur, gen);
 				if (err) {
@@ -1680,6 +1731,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
 				   struct extent_buffer *parent, int slot)
 {
 	int level = btrfs_header_level(parent);
+	struct extent_buffer *eb;
+
 	if (slot < 0)
 		return NULL;
 	if (slot >= btrfs_header_nritems(parent))
@@ -1687,9 +1740,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
 
 	BUG_ON(level == 0);
 
-	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
-		       btrfs_level_size(root, level - 1),
-		       btrfs_node_ptr_generation(parent, slot));
+	eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
+			     btrfs_level_size(root, level - 1),
+			     btrfs_node_ptr_generation(parent, slot));
+	if (eb && !extent_buffer_uptodate(eb)) {
+		free_extent_buffer(eb);
+		eb = NULL;
+	}
+
+	return eb;
 }
 
 /*
@@ -1754,7 +1813,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 			goto enospc;
 		}
 
-		tree_mod_log_set_root_pointer(root, child);
+		tree_mod_log_set_root_pointer(root, child, 1);
 		rcu_assign_pointer(root->node, child);
 
 		add_root_to_dirty_list(root);
@@ -1818,7 +1877,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		if (btrfs_header_nritems(right) == 0) {
 			clean_tree_block(trans, root, right);
 			btrfs_tree_unlock(right);
-			del_ptr(trans, root, path, level + 1, pslot + 1);
+			del_ptr(root, path, level + 1, pslot + 1);
 			root_sub_used(root, right->len);
 			btrfs_free_tree_block(trans, root, right, 0, 1);
 			free_extent_buffer_stale(right);
@@ -1862,7 +1921,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	if (btrfs_header_nritems(mid) == 0) {
 		clean_tree_block(trans, root, mid);
 		btrfs_tree_unlock(mid);
-		del_ptr(trans, root, path, level + 1, pslot);
+		del_ptr(root, path, level + 1, pslot);
 		root_sub_used(root, mid->len);
 		btrfs_free_tree_block(trans, root, mid, 0, 1);
 		free_extent_buffer_stale(mid);
@@ -2210,9 +2269,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
 	int no_skips = 0;
 	struct extent_buffer *t;
 
-	if (path->really_keep_locks)
-		return;
-
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 		if (!path->nodes[i])
 			break;
@@ -2260,7 +2316,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 {
 	int i;
 
-	if (path->keep_locks || path->really_keep_locks)
+	if (path->keep_locks)
 		return;
 
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2493,7 +2549,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 	if (!cow)
 		write_lock_level = -1;
 
-	if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level))
+	if (cow && (p->keep_locks || p->lowest_level))
 		write_lock_level = BTRFS_MAX_LEVEL;
 
 	min_write_lock_level = write_lock_level;
@@ -2795,15 +2851,9 @@ again:
 				btrfs_clear_path_blocking(p, b,
 							  BTRFS_READ_LOCK);
 			}
+			b = tree_mod_log_rewind(root->fs_info, b, time_seq);
 			p->locks[level] = BTRFS_READ_LOCK;
 			p->nodes[level] = b;
-			b = tree_mod_log_rewind(root->fs_info, b, time_seq);
-			if (b != p->nodes[level]) {
-				btrfs_tree_unlock_rw(p->nodes[level],
-						     p->locks[level]);
-				p->locks[level] = 0;
-				p->nodes[level] = b;
-			}
 		} else {
 			p->slots[level] = slot;
 			unlock_up(p, level, lowest_unlock, 0, NULL);
@@ -2902,8 +2952,7 @@ again:
  * higher levels
  *
  */
-static void fixup_low_keys(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
 			   struct btrfs_disk_key *key, int level)
 {
 	int i;
@@ -2928,8 +2977,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
  * This function isn't completely safe. It's the caller's responsibility
  * that the new key won't break the order
  */
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
-			     struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
 			     struct btrfs_key *new_key)
 {
 	struct btrfs_disk_key disk_key;
@@ -2951,7 +2999,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
 	btrfs_set_item_key(eb, &disk_key, slot);
 	btrfs_mark_buffer_dirty(eb);
 	if (slot == 0)
-		fixup_low_keys(trans, root, path, &disk_key, 1);
+		fixup_low_keys(root, path, &disk_key, 1);
 }
 
 /*
@@ -2998,7 +3046,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
 		push_items = min(src_nritems - 8, push_items);
 
 	tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
-			     push_items, 1);
+			     push_items);
 	copy_extent_buffer(dst, src,
 			   btrfs_node_key_ptr_offset(dst_nritems),
 			   btrfs_node_key_ptr_offset(0),
@@ -3069,7 +3117,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 				      sizeof(struct btrfs_key_ptr));
 
 	tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
-			     src_nritems - push_items, push_items, 1);
+			     src_nritems - push_items, push_items);
 	copy_extent_buffer(dst, src,
 			   btrfs_node_key_ptr_offset(0),
 			   btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3093,7 +3141,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
  */
 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
-			   struct btrfs_path *path, int level)
+			   struct btrfs_path *path, int level, int log_removal)
 {
 	u64 lower_gen;
 	struct extent_buffer *lower;
@@ -3144,7 +3192,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(c);
 
 	old = root->node;
-	tree_mod_log_set_root_pointer(root, c);
+	tree_mod_log_set_root_pointer(root, c, log_removal);
 	rcu_assign_pointer(root->node, c);
 
 	/* the super has an extra ref to root->node */
@@ -3221,18 +3269,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 	int mid;
 	int ret;
 	u32 c_nritems;
-	int tree_mod_log_removal = 1;
 
 	c = path->nodes[level];
 	WARN_ON(btrfs_header_generation(c) != trans->transid);
 	if (c == root->node) {
-		/* trying to split the root, lets make a new one */
-		ret = insert_new_root(trans, root, path, level + 1);
 		/*
-		 * removal of root nodes has been logged by
-		 * tree_mod_log_set_root_pointer due to locking
+		 * trying to split the root, lets make a new one
+		 *
+		 * tree mod log: We pass 0 as log_removal parameter to
+		 * insert_new_root, because that root buffer will be kept as a
+		 * normal node. We are going to log removal of half of the
+		 * elements below with tree_mod_log_eb_copy. We're holding a
+		 * tree lock on the buffer, which is why we cannot race with
+		 * other tree_mod_log users.
 		 */
-		tree_mod_log_removal = 0;
+		ret = insert_new_root(trans, root, path, level + 1, 0);
 		if (ret)
 			return ret;
 	} else {
@@ -3270,8 +3321,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
 			    BTRFS_UUID_SIZE);
 
-	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
-			     tree_mod_log_removal);
+	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
 	copy_extent_buffer(split, c,
 			   btrfs_node_key_ptr_offset(0),
 			   btrfs_node_key_ptr_offset(mid),
@@ -3687,7 +3737,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 		clean_tree_block(trans, root, right);
 
 	btrfs_item_key(right, &disk_key, 0);
-	fixup_low_keys(trans, root, path, &disk_key, 1);
+	fixup_low_keys(root, path, &disk_key, 1);
 
 	/* then fixup the leaf pointer in the path */
 	if (path->slots[0] < push_items) {
@@ -3953,7 +4003,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 	}
 
 	if (!path->nodes[1]) {
-		ret = insert_new_root(trans, root, path, 1);
+		ret = insert_new_root(trans, root, path, 1, 1);
 		if (ret)
 			return ret;
 	}
@@ -4047,8 +4097,7 @@ again:
 			path->nodes[0] = right;
 			path->slots[0] = 0;
 			if (path->slots[1] == 0)
-				fixup_low_keys(trans, root, path,
-					       &disk_key, 1);
+				fixup_low_keys(root, path, &disk_key, 1);
 		}
 		btrfs_mark_buffer_dirty(right);
 		return ret;
@@ -4264,7 +4313,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
 		return ret;
 
 	path->slots[0]++;
-	setup_items_for_insert(trans, root, path, new_key, &item_size,
+	setup_items_for_insert(root, path, new_key, &item_size,
 			       item_size, item_size +
 			       sizeof(struct btrfs_item), 1);
 	leaf = path->nodes[0];
@@ -4281,9 +4330,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
  * off the end of the item or if we shift the item to chop bytes off
  * the front.
  */
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 struct btrfs_path *path,
+void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
 			 u32 new_size, int from_end)
 {
 	int slot;
@@ -4367,7 +4414,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
 		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
 		btrfs_set_item_key(leaf, &disk_key, slot);
 		if (slot == 0)
-			fixup_low_keys(trans, root, path, &disk_key, 1);
+			fixup_low_keys(root, path, &disk_key, 1);
 	}
 
 	item = btrfs_item_nr(leaf, slot);
@@ -4383,8 +4430,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
 /*
  * make the item pointed to by the path bigger, data_size is the new size.
  */
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
 		       u32 data_size)
 {
 	int slot;
@@ -4454,8 +4500,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
  * to save stack depth by doing the bulk of the work in a function
  * that doesn't call btrfs_search_slot
  */
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, struct btrfs_path *path,
+void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 			    struct btrfs_key *cpu_key, u32 *data_size,
 			    u32 total_data, u32 total_size, int nr)
 {
@@ -4531,7 +4576,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
 
 	if (slot == 0) {
 		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
-		fixup_low_keys(trans, root, path, &disk_key, 1);
+		fixup_low_keys(root, path, &disk_key, 1);
 	}
 	btrfs_unlock_up_safe(path, 1);
 	btrfs_mark_buffer_dirty(leaf);
@@ -4571,7 +4616,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 	slot = path->slots[0];
 	BUG_ON(slot < 0);
 
-	setup_items_for_insert(trans, root, path, cpu_key, data_size,
+	setup_items_for_insert(root, path, cpu_key, data_size,
 			       total_data, total_size, nr);
 	return 0;
 }
@@ -4609,8 +4654,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  * the tree should have been previously balanced so the deletion does not
  * empty a node.
  */
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct btrfs_path *path, int level, int slot)
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+		    int level, int slot)
 {
 	struct extent_buffer *parent = path->nodes[level];
 	u32 nritems;
@@ -4642,7 +4687,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		struct btrfs_disk_key disk_key;
 
 		btrfs_node_key(parent, &disk_key, 0);
-		fixup_low_keys(trans, root, path, &disk_key, level + 1);
+		fixup_low_keys(root, path, &disk_key, level + 1);
 	}
 	btrfs_mark_buffer_dirty(parent);
 }
@@ -4663,7 +4708,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
 				    struct extent_buffer *leaf)
 {
 	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-	del_ptr(trans, root, path, 1, path->slots[1]);
+	del_ptr(root, path, 1, path->slots[1]);
 
 	/*
 	 * btrfs_free_extent is expensive, we want to make sure we
@@ -4744,7 +4789,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 			struct btrfs_disk_key disk_key;
 
 			btrfs_item_key(leaf, &disk_key, 0);
-			fixup_low_keys(trans, root, path, &disk_key, 1);
+			fixup_low_keys(root, path, &disk_key, 1);
 		}
 
 		/* delete the leaf if it is mostly empty */
@@ -5464,139 +5509,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 	return btrfs_next_old_leaf(root, path, 0);
 }
 
-/* Release the path up to but not including the given level */
-static void btrfs_release_level(struct btrfs_path *path, int level)
-{
-	int i;
-
-	for (i = 0; i < level; i++) {
-		path->slots[i] = 0;
-		if (!path->nodes[i])
-			continue;
-		if (path->locks[i]) {
-			btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
-			path->locks[i] = 0;
-		}
-		free_extent_buffer(path->nodes[i]);
-		path->nodes[i] = NULL;
-	}
-}
-
-/*
- * This function assumes 2 things
- *
- * 1) You are using path->keep_locks
- * 2) You are not inserting items.
- *
- * If either of these are not true do not use this function. If you need a next
- * leaf with either of these not being true then this function can be easily
- * adapted to do that, but at the moment these are the limitations.
- */
-int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, struct btrfs_path *path,
-			  int del)
-{
-	struct extent_buffer *b;
-	struct btrfs_key key;
-	u32 nritems;
-	int level = 1;
-	int slot;
-	int ret = 1;
-	int write_lock_level = BTRFS_MAX_LEVEL;
-	int ins_len = del ? -1 : 0;
-
-	WARN_ON(!(path->keep_locks || path->really_keep_locks));
-
-	nritems = btrfs_header_nritems(path->nodes[0]);
-	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
-
-	while (path->nodes[level]) {
-		nritems = btrfs_header_nritems(path->nodes[level]);
-		if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
-search:
-			btrfs_release_path(path);
-			ret = btrfs_search_slot(trans, root, &key, path,
-						ins_len, 1);
-			if (ret < 0)
-				goto out;
-			level = 1;
-			continue;
-		}
-
-		if (path->slots[level] >= nritems - 1) {
-			level++;
-			continue;
-		}
-
-		btrfs_release_level(path, level);
-		break;
-	}
-
-	if (!path->nodes[level]) {
-		ret = 1;
-		goto out;
-	}
-
-	path->slots[level]++;
-	b = path->nodes[level];
-
-	while (b) {
-		level = btrfs_header_level(b);
-
-		if (!should_cow_block(trans, root, b))
-			goto cow_done;
-
-		btrfs_set_path_blocking(path);
-		ret = btrfs_cow_block(trans, root, b,
-				      path->nodes[level + 1],
-				      path->slots[level + 1], &b);
-		if (ret)
-			goto out;
-cow_done:
-		path->nodes[level] = b;
-		btrfs_clear_path_blocking(path, NULL, 0);
-		if (level != 0) {
-			ret = setup_nodes_for_search(trans, root, path, b,
-						     level, ins_len,
-						     &write_lock_level);
-			if (ret == -EAGAIN)
-				goto search;
-			if (ret)
-				goto out;
-
-			b = path->nodes[level];
-			slot = path->slots[level];
-
-			ret = read_block_for_search(trans, root, path,
-						    &b, level, slot, &key, 0);
-			if (ret == -EAGAIN)
-				goto search;
-			if (ret)
-				goto out;
-			level = btrfs_header_level(b);
-			if (!btrfs_try_tree_write_lock(b)) {
-				btrfs_set_path_blocking(path);
-				btrfs_tree_lock(b);
-				btrfs_clear_path_blocking(path, b,
-							  BTRFS_WRITE_LOCK);
-			}
-			path->locks[level] = BTRFS_WRITE_LOCK;
-			path->nodes[level] = b;
-			path->slots[level] = 0;
-		} else {
-			path->slots[level] = 0;
-			ret = 0;
-			break;
-		}
-	}
-
-out:
-	if (ret)
-		btrfs_release_path(path);
-
-	return ret;
-}
-
 int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
 			u64 time_seq)
 {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d82922..63c328a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -340,6 +340,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
  */
 #define BTRFS_FS_STATE_ERROR		0
 #define BTRFS_FS_STATE_REMOUNTING	1
+#define BTRFS_FS_STATE_TRANS_ABORTED	2
 
 /* Super block flags */
 /* Errors detected */
@@ -508,6 +509,7 @@ struct btrfs_super_block {
 
 #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF	(1ULL << 6)
 #define BTRFS_FEATURE_INCOMPAT_RAID56		(1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA	(1ULL << 8)
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
@@ -518,7 +520,8 @@ struct btrfs_super_block {
 	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
 	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
-	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
+	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -583,7 +586,6 @@ struct btrfs_path {
 	unsigned int skip_locking:1;
 	unsigned int leave_spinning:1;
 	unsigned int search_commit_root:1;
-	unsigned int really_keep_locks:1;
 };
 
 /*
@@ -1019,9 +1021,9 @@ struct btrfs_block_group_item {
  */
 #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
 /*
- * SCANNING is set during the initialization phase
+ * RESCAN is set during the initialization phase
  */
-#define BTRFS_QGROUP_STATUS_FLAG_SCANNING	(1ULL << 1)
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
 /*
  * Some qgroup entries are known to be out of date,
  * either because the configuration has changed in a way that
@@ -1050,7 +1052,7 @@ struct btrfs_qgroup_status_item {
 	 * only used during scanning to record the progress
 	 * of the scan. It contains a logical address
 	 */
-	__le64 scan;
+	__le64 rescan;
 } __attribute__ ((__packed__));
 
 struct btrfs_qgroup_info_item {
@@ -1360,6 +1362,17 @@ struct btrfs_fs_info {
 	wait_queue_head_t transaction_blocked_wait;
 	wait_queue_head_t async_submit_wait;
 
+	/*
+	 * Used to protect the incompat_flags, compat_flags, compat_ro_flags
+	 * when they are updated.
+	 *
+	 * Because we do not clear the flags for ever, so we needn't use
+	 * the lock on the read side.
+	 *
+	 * We also needn't use the lock when we mount the fs, because
+	 * there is no other task which will update the flag.
+	 */
+	spinlock_t super_lock;
 	struct btrfs_super_block *super_copy;
 	struct btrfs_super_block *super_for_commit;
 	struct block_device *__bdev;
@@ -1409,7 +1422,7 @@ struct btrfs_fs_info {
 
 	/* this protects tree_mod_seq_list */
 	spinlock_t tree_mod_seq_lock;
-	atomic_t tree_mod_seq;
+	atomic64_t tree_mod_seq;
 	struct list_head tree_mod_seq_list;
 	struct seq_list tree_mod_seq_elem;
 
@@ -1581,12 +1594,20 @@ struct btrfs_fs_info {
 	struct rb_root qgroup_tree;
 	spinlock_t qgroup_lock;
 
+	/* protect user change for quota operations */
+	struct mutex qgroup_ioctl_lock;
+
 	/* list of dirty qgroups to be written at next commit */
 	struct list_head dirty_qgroups;
 
 	/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
 	u64 qgroup_seq;
 
+	/* qgroup rescan items */
+	struct mutex qgroup_rescan_lock; /* protects the progress item */
+	struct btrfs_key qgroup_rescan_progress;
+	struct btrfs_workers qgroup_rescan_workers;
+
 	/* filesystem state */
 	unsigned long fs_state;
 
@@ -1808,6 +1829,12 @@ struct btrfs_ioctl_defrag_range_args {
  */
 #define BTRFS_EXTENT_ITEM_KEY	168
 
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY	169
+
 #define BTRFS_TREE_BLOCK_REF_KEY	176
 
 #define BTRFS_EXTENT_DATA_REF_KEY	178
@@ -2766,8 +2793,10 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
 
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
-	int t = btrfs_super_csum_type(s);
-	BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes));
+	u16 t = btrfs_super_csum_type(s);
+	/*
+	 * csum type is validated at mount time
+	 */
 	return btrfs_csum_sizes[t];
 }
 
@@ -2864,8 +2893,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
 		   version, 64);
 BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
 		   flags, 64);
-BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
-		   scan, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+		   rescan, 64);
 
 /* btrfs_qgroup_info_item */
 BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3005,7 +3034,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
-			     u64 num_bytes, u64 *refs, u64 *flags);
+			     u64 offset, int metadata, u64 *refs, u64 *flags);
 int btrfs_pin_extent(struct btrfs_root *root,
 		     u64 bytenr, u64 num, int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
@@ -3017,8 +3046,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
 						 struct btrfs_fs_info *info,
 						 u64 bytenr);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
-u64 btrfs_find_block_group(struct btrfs_root *root,
-			   u64 search_start, u64 search_hint, int owner);
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root, u32 blocksize,
 					u64 parent, u64 root_objectid,
@@ -3028,10 +3055,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct extent_buffer *buf,
 			   u64 parent, int last_ref);
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
-					    struct btrfs_root *root,
-					    u64 bytenr, u32 blocksize,
-					    int level);
 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root,
 				     u64 root_objectid, u64 owner,
@@ -3044,7 +3067,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root,
 				  u64 num_bytes, u64 min_alloc_size,
 				  u64 empty_size, u64 hint_byte,
-				  struct btrfs_key *ins, u64 data);
+				  struct btrfs_key *ins, int is_data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		  struct extent_buffer *buf, int full_backref, int for_cow);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3084,7 +3107,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start);
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
 				       struct btrfs_root *root);
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 
@@ -3161,8 +3183,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
 			int type);
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
-			     struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
 			     struct btrfs_key *new_key);
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -3198,12 +3219,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 		      struct extent_buffer **cow_ret, u64 new_root_objectid);
 int btrfs_block_can_be_shared(struct btrfs_root *root,
 			      struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
 		       u32 data_size);
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 struct btrfs_path *path,
+void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
 			 u32 new_size, int from_end);
 int btrfs_split_item(struct btrfs_trans_handle *trans,
 		     struct btrfs_root *root,
@@ -3243,8 +3261,7 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
 	return btrfs_del_items(trans, root, path, path->slots[0], 1);
 }
 
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, struct btrfs_path *path,
+void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 			    struct btrfs_key *cpu_key, u32 *data_size,
 			    u32 total_data, u32 total_size, int nr);
 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -3264,9 +3281,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
-int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, struct btrfs_path *path,
-			  int del);
 int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
 			u64 time_seq);
 static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3281,7 +3295,6 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
 {
 	return btrfs_next_old_item(root, p, 0);
 }
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
 int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
 				     struct btrfs_block_rsv *block_rsv,
@@ -3318,10 +3331,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			   struct seq_list *elem);
 void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			    struct seq_list *elem);
-static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
-{
-	return atomic_inc_return(&fs_info->tree_mod_seq);
-}
+u64 btrfs_tree_mod_seq_prev(u64 seq);
 int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
 
 /* root-item.c */
@@ -3345,9 +3355,8 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct btrfs_key *key,
 				   struct btrfs_root_item *item);
-void btrfs_read_root_item(struct btrfs_root *root,
-			 struct extent_buffer *eb, int slot,
-			 struct btrfs_root_item *item);
+void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+			  struct btrfs_root_item *item);
 int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
 			 btrfs_root_item *item, struct btrfs_key *key);
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -3380,9 +3389,6 @@ struct btrfs_dir_item *
 btrfs_search_dir_index_item(struct btrfs_root *root,
 			    struct btrfs_path *path, u64 dirid,
 			    const char *name, int name_len);
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
 int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct btrfs_path *path,
@@ -3460,16 +3466,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     struct btrfs_path *path, u64 objectid,
 			     u64 bytenr, int mod);
-u64 btrfs_file_extent_length(struct btrfs_path *path);
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
 		       struct bio *bio, u64 file_start, int contig);
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, int cow);
 int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct btrfs_path *path,
 			u64 isize);
@@ -3531,8 +3532,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state);
-int btrfs_writepages(struct address_space *mapping,
-		     struct writeback_control *wbc);
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *new_root, u64 new_dirid);
 int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
@@ -3542,7 +3541,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
@@ -3560,7 +3558,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
 int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode);
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_cleanup(struct btrfs_root *root);
 void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root);
@@ -3611,7 +3608,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 			      struct inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
-void btrfs_drop_pages(struct page **pages, size_t num_pages);
 int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
 		      struct page **pages, size_t num_pages,
 		      loff_t pos, size_t write_bytes,
@@ -3634,14 +3630,31 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
 
 #ifdef CONFIG_PRINTK
 __printf(2, 3)
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
 #else
 static inline __printf(2, 3)
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 {
 }
 #endif
 
+#define btrfs_emerg(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
+#define btrfs_alert(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
+#define btrfs_crit(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
+#define btrfs_err(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_ERR fmt, ##args)
+#define btrfs_warn(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
+#define btrfs_notice(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
+#define btrfs_info(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_INFO fmt, ##args)
+#define btrfs_debug(fs_info, fmt, args...) \
+	btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+
 __printf(5, 6)
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 		     unsigned int line, int errno, const char *fmt, ...);
@@ -3663,11 +3676,28 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
 	disk_super = fs_info->super_copy;
 	features = btrfs_super_incompat_flags(disk_super);
 	if (!(features & flag)) {
-		features |= flag;
-		btrfs_set_super_incompat_flags(disk_super, features);
+		spin_lock(&fs_info->super_lock);
+		features = btrfs_super_incompat_flags(disk_super);
+		if (!(features & flag)) {
+			features |= flag;
+			btrfs_set_super_incompat_flags(disk_super, features);
+			printk(KERN_INFO "btrfs: setting %llu feature flag\n",
+					 flag);
+		}
+		spin_unlock(&fs_info->super_lock);
 	}
 }
 
+#define btrfs_fs_incompat(fs_info, opt) \
+	__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+{
+	struct btrfs_super_block *disk_super;
+	disk_super = fs_info->super_copy;
+	return !!(btrfs_super_incompat_flags(disk_super) & flag);
+}
+
 /*
  * Call btrfs_abort_transaction as early as possible when an error condition is
  * detected, that way the exact line number is reported.
@@ -3753,7 +3783,6 @@ void btrfs_scrub_continue_super(struct btrfs_root *root);
 int btrfs_scrub_cancel(struct btrfs_fs_info *info);
 int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
 			   struct btrfs_device *dev);
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
 			 struct btrfs_scrub_progress *progress);
 
@@ -3784,7 +3813,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info);
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 14fce27..f26f38c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -202,7 +202,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 	spin_unlock(&root->lock);
 }
 
-struct btrfs_delayed_node *btrfs_first_delayed_node(
+static struct btrfs_delayed_node *btrfs_first_delayed_node(
 			struct btrfs_delayed_root *delayed_root)
 {
 	struct list_head *p;
@@ -221,7 +221,7 @@ out:
 	return node;
 }
 
-struct btrfs_delayed_node *btrfs_next_delayed_node(
+static struct btrfs_delayed_node *btrfs_next_delayed_node(
 						struct btrfs_delayed_node *node)
 {
 	struct btrfs_delayed_root *delayed_root;
@@ -282,7 +282,7 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
 	__btrfs_release_delayed_node(node, 0);
 }
 
-struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
+static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 					struct btrfs_delayed_root *delayed_root)
 {
 	struct list_head *p;
@@ -308,7 +308,7 @@ static inline void btrfs_release_prepared_delayed_node(
 	__btrfs_release_delayed_node(node, 1);
 }
 
-struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
+static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 {
 	struct btrfs_delayed_item *item;
 	item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
@@ -383,7 +383,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
 	return NULL;
 }
 
-struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
+static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 					struct btrfs_delayed_node *delayed_node,
 					struct btrfs_key *key)
 {
@@ -394,45 +394,6 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 	return item;
 }
 
-struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
-					struct btrfs_delayed_node *delayed_node,
-					struct btrfs_key *key)
-{
-	struct btrfs_delayed_item *item;
-
-	item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
-					   NULL, NULL);
-	return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
-					struct btrfs_delayed_node *delayed_node,
-					struct btrfs_key *key)
-{
-	struct btrfs_delayed_item *item, *next;
-
-	item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
-					   NULL, &next);
-	if (!item)
-		item = next;
-
-	return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
-					struct btrfs_delayed_node *delayed_node,
-					struct btrfs_key *key)
-{
-	struct btrfs_delayed_item *item, *next;
-
-	item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
-					   NULL, &next);
-	if (!item)
-		item = next;
-
-	return item;
-}
-
 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
 				    struct btrfs_delayed_item *ins,
 				    int action)
@@ -535,7 +496,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 	}
 }
 
-struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
+static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 					struct btrfs_delayed_node *delayed_node)
 {
 	struct rb_node *p;
@@ -548,7 +509,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 	return item;
 }
 
-struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
+static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 					struct btrfs_delayed_node *delayed_node)
 {
 	struct rb_node *p;
@@ -561,7 +522,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 	return item;
 }
 
-struct btrfs_delayed_item *__btrfs_next_delayed_item(
+static struct btrfs_delayed_item *__btrfs_next_delayed_item(
 						struct btrfs_delayed_item *item)
 {
 	struct rb_node *p;
@@ -766,10 +727,9 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
  * This helper will insert some continuous items into the same leaf according
  * to the free space of the leaf.
  */
-static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
-				struct btrfs_path *path,
-				struct btrfs_delayed_item *item)
+static int btrfs_batch_insert_items(struct btrfs_root *root,
+				    struct btrfs_path *path,
+				    struct btrfs_delayed_item *item)
 {
 	struct btrfs_delayed_item *curr, *next;
 	int free_space;
@@ -848,7 +808,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
 	btrfs_clear_path_blocking(path, NULL, 0);
 
 	/* insert the keys of the items */
-	setup_items_for_insert(trans, root, path, keys, data_size,
+	setup_items_for_insert(root, path, keys, data_size,
 			       total_data_size, total_size, nitems);
 
 	/* insert the dir index items */
@@ -932,7 +892,7 @@ do_again:
 	if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
 		/* insert the continuous items into the same leaf */
 		path->slots[0]++;
-		btrfs_batch_insert_items(trans, root, path, curr);
+		btrfs_batch_insert_items(root, path, curr);
 	}
 	btrfs_release_delayed_item(prev);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b7a0641..c219463 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,16 +40,19 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
  * compare two delayed tree backrefs with same bytenr and type
  */
 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
-			  struct btrfs_delayed_tree_ref *ref1)
+			  struct btrfs_delayed_tree_ref *ref1, int type)
 {
-	if (ref1->root < ref2->root)
-		return -1;
-	if (ref1->root > ref2->root)
-		return 1;
-	if (ref1->parent < ref2->parent)
-		return -1;
-	if (ref1->parent > ref2->parent)
-		return 1;
+	if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
 	return 0;
 }
 
@@ -113,7 +116,8 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
 	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
-				      btrfs_delayed_node_to_tree_ref(ref1));
+				      btrfs_delayed_node_to_tree_ref(ref1),
+				      ref1->type);
 	} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
 		   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
 		return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
@@ -357,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 		elem = list_first_entry(&fs_info->tree_mod_seq_list,
 					struct seq_list, list);
 		if (seq >= elem->seq) {
-			pr_debug("holding back delayed_ref %llu, lowest is "
-				 "%llu (%p)\n", seq, elem->seq, delayed_refs);
+			pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
+				 (u32)(seq >> 32), (u32)seq,
+				 (u32)(elem->seq >> 32), (u32)elem->seq,
+				 delayed_refs);
 			ret = 1;
 		}
 	}
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 502c215..79e594e 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,10 @@
 #include "hash.h"
 #include "transaction.h"
 
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+			      struct btrfs_path *path,
+			      const char *name, int name_len);
+
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -49,7 +53,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
 		di = btrfs_match_dir_item_name(root, path, name, name_len);
 		if (di)
 			return ERR_PTR(-EEXIST);
-		btrfs_extend_item(trans, root, path, data_size);
+		btrfs_extend_item(root, path, data_size);
 	} else if (ret < 0)
 		return ERR_PTR(ret);
 	WARN_ON(ret > 0);
@@ -379,7 +383,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
 			      struct btrfs_path *path,
 			      const char *name, int name_len)
 {
@@ -442,8 +446,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 		start = btrfs_item_ptr_offset(leaf, path->slots[0]);
 		memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
 			item_len - (ptr + sub_item_len - start));
-		btrfs_truncate_item(trans, root, path,
-				    item_len - sub_item_len, 1);
+		btrfs_truncate_item(root, path, item_len - sub_item_len, 1);
 	}
 	return ret;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6d19a0a..4e9ebe1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
+#include <linux/uuid.h>
 #include <asm/unaligned.h>
 #include "compat.h"
 #include "ctree.h"
@@ -69,6 +70,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 					int mark);
 static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
 				       struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
+static void btrfs_error_commit_super(struct btrfs_root *root);
 
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
@@ -222,7 +225,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
 	em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
 	write_lock(&em_tree->lock);
-	ret = add_extent_mapping(em_tree, em);
+	ret = add_extent_mapping(em_tree, em, 0);
 	if (ret == -EEXIST) {
 		free_extent_map(em);
 		em = lookup_extent_mapping(em_tree, start, len);
@@ -238,7 +241,7 @@ out:
 	return em;
 }
 
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(char *data, u32 seed, size_t len)
 {
 	return crc32c(seed, data, len);
 }
@@ -274,7 +277,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 		if (err)
 			return 1;
 		cur_len = min(len, map_len - (offset - map_start));
-		crc = btrfs_csum_data(root, kaddr + offset - map_start,
+		crc = btrfs_csum_data(kaddr + offset - map_start,
 				      crc, cur_len);
 		len -= cur_len;
 		offset += cur_len;
@@ -354,6 +357,49 @@ out:
 }
 
 /*
+ * Return 0 if the superblock checksum type matches the checksum value of that
+ * algorithm. Pass the raw disk superblock data.
+ */
+static int btrfs_check_super_csum(char *raw_disk_sb)
+{
+	struct btrfs_super_block *disk_sb =
+		(struct btrfs_super_block *)raw_disk_sb;
+	u16 csum_type = btrfs_super_csum_type(disk_sb);
+	int ret = 0;
+
+	if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
+		u32 crc = ~(u32)0;
+		const int csum_size = sizeof(crc);
+		char result[csum_size];
+
+		/*
+		 * The super_block structure does not span the whole
+		 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
+		 * is filled with zeros and is included in the checkum.
+		 */
+		crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
+				crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+		btrfs_csum_final(crc, result);
+
+		if (memcmp(raw_disk_sb, result, csum_size))
+			ret = 1;
+
+		if (ret && btrfs_super_generation(disk_sb) < 10) {
+			printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+			ret = 0;
+		}
+	}
+
+	if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+		printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+				csum_type);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/*
  * helper to read a given tree block, doing retries as required when
  * the checksums don't match and we have alternate mirrors to try.
  */
@@ -530,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
 	return 0;
 }
 
-struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
-				       struct page *page, int max_walk)
-{
-	struct extent_buffer *eb;
-	u64 start = page_offset(page);
-	u64 target = start;
-	u64 min_start;
-
-	if (start < max_walk)
-		min_start = 0;
-	else
-		min_start = start - max_walk;
-
-	while (start >= min_start) {
-		eb = find_extent_buffer(tree, start, 0);
-		if (eb) {
-			/*
-			 * we found an extent buffer and it contains our page
-			 * horray!
-			 */
-			if (eb->start <= target &&
-			    eb->start + eb->len > target)
-				return eb;
-
-			/* we found an extent buffer that wasn't for us */
-			free_extent_buffer(eb);
-			return NULL;
-		}
-		if (start == 0)
-			break;
-		start -= PAGE_CACHE_SIZE;
-	}
-	return NULL;
-}
-
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 			       struct extent_state *state, int mirror)
 {
@@ -613,6 +624,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 		goto err;
 	}
 	found_level = btrfs_header_level(eb);
+	if (found_level >= BTRFS_MAX_LEVEL) {
+		btrfs_info(root->fs_info, "bad tree block level %d\n",
+			   (int)btrfs_header_level(eb));
+		ret = -EIO;
+		goto err;
+	}
 
 	btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
 				       eb, found_level);
@@ -636,10 +653,9 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	if (!ret)
 		set_extent_buffer_uptodate(eb);
 err:
-	if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
-		clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
+	if (reads_done &&
+	    test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
 		btree_readahead_hook(root, eb, eb->start, ret);
-	}
 
 	if (ret) {
 		/*
@@ -993,14 +1009,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	if (PageWriteback(page) || PageDirty(page))
 		return 0;
-	/*
-	 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
-	 * slab allocation from alloc_extent_state down the callchain where
-	 * it'd hit a BUG_ON as those flags are not allowed.
-	 */
-	gfp_flags &= ~GFP_SLAB_BUG_MASK;
 
-	return try_release_extent_buffer(page, gfp_flags);
+	return try_release_extent_buffer(page);
 }
 
 static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -1275,6 +1285,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 	struct btrfs_key key;
 	int ret = 0;
 	u64 bytenr;
+	uuid_le uuid;
 
 	root = btrfs_alloc_root(fs_info);
 	if (!root)
@@ -1324,6 +1335,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 	btrfs_set_root_used(&root->root_item, leaf->len);
 	btrfs_set_root_last_snapshot(&root->root_item, 0);
 	btrfs_set_root_dirid(&root->root_item, 0);
+	uuid_le_gen(&uuid);
+	memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
 	root->root_item.drop_level = 0;
 
 	key.objectid = objectid;
@@ -1476,7 +1489,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 	if (ret == 0) {
 		l = path->nodes[0];
 		slot = path->slots[0];
-		btrfs_read_root_item(tree_root, l, slot, &root->root_item);
+		btrfs_read_root_item(l, slot, &root->root_item);
 		memcpy(&root->root_key, location, sizeof(*location));
 	}
 	btrfs_free_path(path);
@@ -1491,6 +1504,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
+	if (!root->node || !extent_buffer_uptodate(root->node)) {
+		ret = (!root->node) ? -ENOMEM : -EIO;
+
+		free_extent_buffer(root->node);
+		kfree(root);
+		return ERR_PTR(ret);
+	}
+
 	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node); /* -ENOMEM */
 out:
@@ -1658,15 +1679,20 @@ static int cleaner_kthread(void *arg)
 	struct btrfs_root *root = arg;
 
 	do {
+		int again = 0;
+
 		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
-		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
-			btrfs_run_delayed_iputs(root);
-			btrfs_clean_old_snapshots(root);
-			mutex_unlock(&root->fs_info->cleaner_mutex);
+		    down_read_trylock(&root->fs_info->sb->s_umount)) {
+			if (mutex_trylock(&root->fs_info->cleaner_mutex)) {
+				btrfs_run_delayed_iputs(root);
+				again = btrfs_clean_one_deleted_snapshot(root);
+				mutex_unlock(&root->fs_info->cleaner_mutex);
+			}
 			btrfs_run_defrag_inodes(root->fs_info);
+			up_read(&root->fs_info->sb->s_umount);
 		}
 
-		if (!try_to_freeze()) {
+		if (!try_to_freeze() && !again) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop())
 				schedule();
@@ -1935,6 +1961,28 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
 	return 0;
 }
 
+/* helper to cleanup workers */
+static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
+{
+	btrfs_stop_workers(&fs_info->generic_worker);
+	btrfs_stop_workers(&fs_info->fixup_workers);
+	btrfs_stop_workers(&fs_info->delalloc_workers);
+	btrfs_stop_workers(&fs_info->workers);
+	btrfs_stop_workers(&fs_info->endio_workers);
+	btrfs_stop_workers(&fs_info->endio_meta_workers);
+	btrfs_stop_workers(&fs_info->endio_raid56_workers);
+	btrfs_stop_workers(&fs_info->rmw_workers);
+	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
+	btrfs_stop_workers(&fs_info->endio_write_workers);
+	btrfs_stop_workers(&fs_info->endio_freespace_worker);
+	btrfs_stop_workers(&fs_info->submit_workers);
+	btrfs_stop_workers(&fs_info->delayed_workers);
+	btrfs_stop_workers(&fs_info->caching_workers);
+	btrfs_stop_workers(&fs_info->readahead_workers);
+	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
+}
+
 /* helper to cleanup tree roots */
 static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
 {
@@ -1972,6 +2020,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
 	}
 }
 
+static void del_fs_roots(struct btrfs_fs_info *fs_info)
+{
+	int ret;
+	struct btrfs_root *gang[8];
+	int i;
+
+	while (!list_empty(&fs_info->dead_roots)) {
+		gang[0] = list_entry(fs_info->dead_roots.next,
+				     struct btrfs_root, root_list);
+		list_del(&gang[0]->root_list);
+
+		if (gang[0]->in_radix) {
+			btrfs_free_fs_root(fs_info, gang[0]);
+		} else {
+			free_extent_buffer(gang[0]->node);
+			free_extent_buffer(gang[0]->commit_root);
+			kfree(gang[0]);
+		}
+	}
+
+	while (1) {
+		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+					     (void **)gang, 0,
+					     ARRAY_SIZE(gang));
+		if (!ret)
+			break;
+		for (i = 0; i < ret; i++)
+			btrfs_free_fs_root(fs_info, gang[i]);
+	}
+}
 
 int open_ctree(struct super_block *sb,
 	       struct btrfs_fs_devices *fs_devices,
@@ -2060,6 +2138,7 @@ int open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->defrag_inodes_lock);
 	spin_lock_init(&fs_info->free_chunk_lock);
 	spin_lock_init(&fs_info->tree_mod_seq_lock);
+	spin_lock_init(&fs_info->super_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	mutex_init(&fs_info->reloc_mutex);
 	seqlock_init(&fs_info->profiles_lock);
@@ -2083,7 +2162,7 @@ int open_ctree(struct super_block *sb,
 	atomic_set(&fs_info->async_submit_draining, 0);
 	atomic_set(&fs_info->nr_async_bios, 0);
 	atomic_set(&fs_info->defrag_running, 0);
-	atomic_set(&fs_info->tree_mod_seq, 0);
+	atomic64_set(&fs_info->tree_mod_seq, 0);
 	fs_info->sb = sb;
 	fs_info->max_inline = 8192 * 1024;
 	fs_info->metadata_ratio = 0;
@@ -2187,11 +2266,13 @@ int open_ctree(struct super_block *sb,
 	mutex_init(&fs_info->dev_replace.lock);
 
 	spin_lock_init(&fs_info->qgroup_lock);
+	mutex_init(&fs_info->qgroup_ioctl_lock);
 	fs_info->qgroup_tree = RB_ROOT;
 	INIT_LIST_HEAD(&fs_info->dirty_qgroups);
 	fs_info->qgroup_seq = 1;
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
+	mutex_init(&fs_info->qgroup_rescan_lock);
 
 	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
 	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2211,12 +2292,31 @@ int open_ctree(struct super_block *sb,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
 	invalidate_bdev(fs_devices->latest_bdev);
+
+	/*
+	 * Read super block and check the signature bytes only
+	 */
 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
 	if (!bh) {
 		err = -EINVAL;
 		goto fail_alloc;
 	}
 
+	/*
+	 * We want to check superblock checksum, the type is stored inside.
+	 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
+	 */
+	if (btrfs_check_super_csum(bh->b_data)) {
+		printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+		err = -EINVAL;
+		goto fail_alloc;
+	}
+
+	/*
+	 * super_copy is zeroed at allocation time and we never touch the
+	 * following bytes up to INFO_SIZE, the checksum is calculated from
+	 * the whole block of INFO_SIZE
+	 */
 	memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
 	memcpy(fs_info->super_for_commit, fs_info->super_copy,
 	       sizeof(*fs_info->super_for_commit));
@@ -2224,6 +2324,13 @@ int open_ctree(struct super_block *sb,
 
 	memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
 
+	ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+	if (ret) {
+		printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+		err = -EINVAL;
+		goto fail_alloc;
+	}
+
 	disk_super = fs_info->super_copy;
 	if (!btrfs_super_root(disk_super))
 		goto fail_alloc;
@@ -2232,13 +2339,6 @@ int open_ctree(struct super_block *sb,
 	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
 		set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 
-	ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
-	if (ret) {
-		printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
-		err = ret;
-		goto fail_alloc;
-	}
-
 	/*
 	 * run through our array of backup supers and setup
 	 * our ring pointer to the oldest one
@@ -2290,6 +2390,9 @@ int open_ctree(struct super_block *sb,
 	if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
 		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
 
+	if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+		printk(KERN_ERR "btrfs: has skinny extents\n");
+
 	/*
 	 * flag our filesystem as having big metadata blocks if
 	 * they are bigger than the page size
@@ -2319,6 +2422,10 @@ int open_ctree(struct super_block *sb,
 		goto fail_alloc;
 	}
 
+	/*
+	 * Needn't use the lock because there is no other task which will
+	 * update the flag.
+	 */
 	btrfs_set_super_incompat_flags(disk_super, features);
 
 	features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2394,6 +2501,8 @@ int open_ctree(struct super_block *sb,
 	btrfs_init_workers(&fs_info->readahead_workers, "readahead",
 			   fs_info->thread_pool_size,
 			   &fs_info->generic_worker);
+	btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+			   &fs_info->generic_worker);
 
 	/*
 	 * endios are largely parallel and should have a very
@@ -2428,6 +2537,7 @@ int open_ctree(struct super_block *sb,
 	ret |= btrfs_start_workers(&fs_info->caching_workers);
 	ret |= btrfs_start_workers(&fs_info->readahead_workers);
 	ret |= btrfs_start_workers(&fs_info->flush_workers);
+	ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
 	if (ret) {
 		err = -ENOMEM;
 		goto fail_sb_buffer;
@@ -2475,8 +2585,8 @@ int open_ctree(struct super_block *sb,
 	chunk_root->node = read_tree_block(chunk_root,
 					   btrfs_super_chunk_root(disk_super),
 					   blocksize, generation);
-	BUG_ON(!chunk_root->node); /* -ENOMEM */
-	if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+	if (!chunk_root->node ||
+	    !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
 		printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
 		       sb->s_id);
 		goto fail_tree_roots;
@@ -2661,6 +2771,13 @@ retry_root_backup:
 		log_tree_root->node = read_tree_block(tree_root, bytenr,
 						      blocksize,
 						      generation + 1);
+		if (!log_tree_root->node ||
+		    !extent_buffer_uptodate(log_tree_root->node)) {
+			printk(KERN_ERR "btrfs: failed to read log tree\n");
+			free_extent_buffer(log_tree_root->node);
+			kfree(log_tree_root);
+			goto fail_trans_kthread;
+		}
 		/* returns with log_tree_root freed on success */
 		ret = btrfs_recover_log_trees(log_tree_root);
 		if (ret) {
@@ -2740,6 +2857,8 @@ fail_qgroup:
 	btrfs_free_qgroup_config(fs_info);
 fail_trans_kthread:
 	kthread_stop(fs_info->transaction_kthread);
+	del_fs_roots(fs_info);
+	btrfs_cleanup_transaction(fs_info->tree_root);
 fail_cleaner:
 	kthread_stop(fs_info->cleaner_kthread);
 
@@ -2750,6 +2869,7 @@ fail_cleaner:
 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
 fail_block_groups:
+	btrfs_put_block_group_cache(fs_info);
 	btrfs_free_block_groups(fs_info);
 
 fail_tree_roots:
@@ -2757,22 +2877,7 @@ fail_tree_roots:
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
 fail_sb_buffer:
-	btrfs_stop_workers(&fs_info->generic_worker);
-	btrfs_stop_workers(&fs_info->readahead_workers);
-	btrfs_stop_workers(&fs_info->fixup_workers);
-	btrfs_stop_workers(&fs_info->delalloc_workers);
-	btrfs_stop_workers(&fs_info->workers);
-	btrfs_stop_workers(&fs_info->endio_workers);
-	btrfs_stop_workers(&fs_info->endio_meta_workers);
-	btrfs_stop_workers(&fs_info->endio_raid56_workers);
-	btrfs_stop_workers(&fs_info->rmw_workers);
-	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
-	btrfs_stop_workers(&fs_info->endio_write_workers);
-	btrfs_stop_workers(&fs_info->endio_freespace_worker);
-	btrfs_stop_workers(&fs_info->submit_workers);
-	btrfs_stop_workers(&fs_info->delayed_workers);
-	btrfs_stop_workers(&fs_info->caching_workers);
-	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_all_workers(fs_info);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2904,7 +3009,10 @@ static int write_dev_supers(struct btrfs_device *device,
 		if (wait) {
 			bh = __find_get_block(device->bdev, bytenr / 4096,
 					      BTRFS_SUPER_INFO_SIZE);
-			BUG_ON(!bh);
+			if (!bh) {
+				errors++;
+				continue;
+			}
 			wait_on_buffer(bh);
 			if (!buffer_uptodate(bh))
 				errors++;
@@ -2919,7 +3027,7 @@ static int write_dev_supers(struct btrfs_device *device,
 			btrfs_set_super_bytenr(sb, bytenr);
 
 			crc = ~(u32)0;
-			crc = btrfs_csum_data(NULL, (char *)sb +
+			crc = btrfs_csum_data((char *)sb +
 					      BTRFS_CSUM_SIZE, crc,
 					      BTRFS_SUPER_INFO_SIZE -
 					      BTRFS_CSUM_SIZE);
@@ -2931,6 +3039,13 @@ static int write_dev_supers(struct btrfs_device *device,
 			 */
 			bh = __getblk(device->bdev, bytenr / 4096,
 				      BTRFS_SUPER_INFO_SIZE);
+			if (!bh) {
+				printk(KERN_ERR "btrfs: couldn't get super "
+				       "buffer head for bytenr %Lu\n", bytenr);
+				errors++;
+				continue;
+			}
+
 			memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
 
 			/* one reference for submit_bh */
@@ -3153,7 +3268,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
 	return num_tolerated_disk_barrier_failures;
 }
 
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
 	struct list_head *head;
 	struct btrfs_device *dev;
@@ -3283,37 +3398,6 @@ static void free_fs_root(struct btrfs_root *root)
 	kfree(root);
 }
 
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
-{
-	int ret;
-	struct btrfs_root *gang[8];
-	int i;
-
-	while (!list_empty(&fs_info->dead_roots)) {
-		gang[0] = list_entry(fs_info->dead_roots.next,
-				     struct btrfs_root, root_list);
-		list_del(&gang[0]->root_list);
-
-		if (gang[0]->in_radix) {
-			btrfs_free_fs_root(fs_info, gang[0]);
-		} else {
-			free_extent_buffer(gang[0]->node);
-			free_extent_buffer(gang[0]->commit_root);
-			kfree(gang[0]);
-		}
-	}
-
-	while (1) {
-		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
-					     (void **)gang, 0,
-					     ARRAY_SIZE(gang));
-		if (!ret)
-			break;
-		for (i = 0; i < ret; i++)
-			btrfs_free_fs_root(fs_info, gang[i]);
-	}
-}
-
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 {
 	u64 root_objectid = 0;
@@ -3349,8 +3433,8 @@ int btrfs_commit_super(struct btrfs_root *root)
 
 	mutex_lock(&root->fs_info->cleaner_mutex);
 	btrfs_run_delayed_iputs(root);
-	btrfs_clean_old_snapshots(root);
 	mutex_unlock(&root->fs_info->cleaner_mutex);
+	wake_up_process(root->fs_info->cleaner_kthread);
 
 	/* wait until ongoing cleanup work done */
 	down_write(&root->fs_info->cleanup_work_sem);
@@ -3426,20 +3510,7 @@ int close_ctree(struct btrfs_root *root)
 		       percpu_counter_sum(&fs_info->delalloc_bytes));
 	}
 
-	free_extent_buffer(fs_info->extent_root->node);
-	free_extent_buffer(fs_info->extent_root->commit_root);
-	free_extent_buffer(fs_info->tree_root->node);
-	free_extent_buffer(fs_info->tree_root->commit_root);
-	free_extent_buffer(fs_info->chunk_root->node);
-	free_extent_buffer(fs_info->chunk_root->commit_root);
-	free_extent_buffer(fs_info->dev_root->node);
-	free_extent_buffer(fs_info->dev_root->commit_root);
-	free_extent_buffer(fs_info->csum_root->node);
-	free_extent_buffer(fs_info->csum_root->commit_root);
-	if (fs_info->quota_root) {
-		free_extent_buffer(fs_info->quota_root->node);
-		free_extent_buffer(fs_info->quota_root->commit_root);
-	}
+	free_root_pointers(fs_info, 1);
 
 	btrfs_free_block_groups(fs_info);
 
@@ -3447,22 +3518,7 @@ int close_ctree(struct btrfs_root *root)
 
 	iput(fs_info->btree_inode);
 
-	btrfs_stop_workers(&fs_info->generic_worker);
-	btrfs_stop_workers(&fs_info->fixup_workers);
-	btrfs_stop_workers(&fs_info->delalloc_workers);
-	btrfs_stop_workers(&fs_info->workers);
-	btrfs_stop_workers(&fs_info->endio_workers);
-	btrfs_stop_workers(&fs_info->endio_meta_workers);
-	btrfs_stop_workers(&fs_info->endio_raid56_workers);
-	btrfs_stop_workers(&fs_info->rmw_workers);
-	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
-	btrfs_stop_workers(&fs_info->endio_write_workers);
-	btrfs_stop_workers(&fs_info->endio_freespace_worker);
-	btrfs_stop_workers(&fs_info->submit_workers);
-	btrfs_stop_workers(&fs_info->delayed_workers);
-	btrfs_stop_workers(&fs_info->caching_workers);
-	btrfs_stop_workers(&fs_info->readahead_workers);
-	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_all_workers(fs_info);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 	if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3567,18 +3623,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 			      int read_only)
 {
-	if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
-		printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
-		return -EINVAL;
-	}
-
-	if (read_only)
-		return 0;
-
+	/*
+	 * Placeholder for checks
+	 */
 	return 0;
 }
 
-void btrfs_error_commit_super(struct btrfs_root *root)
+static void btrfs_error_commit_super(struct btrfs_root *root)
 {
 	mutex_lock(&root->fs_info->cleaner_mutex);
 	btrfs_run_delayed_iputs(root);
@@ -3669,6 +3720,9 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				continue;
 			}
 
+			if (head->must_insert_reserved)
+				btrfs_pin_extent(root, ref->bytenr,
+						 ref->num_bytes, 1);
 			btrfs_free_delayed_extent_op(head->extent_op);
 			delayed_refs->num_heads--;
 			if (list_empty(&head->cluster))
@@ -3740,13 +3794,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 					int mark)
 {
 	int ret;
-	struct page *page;
-	struct inode *btree_inode = root->fs_info->btree_inode;
 	struct extent_buffer *eb;
 	u64 start = 0;
 	u64 end;
-	u64 offset;
-	unsigned long index;
 
 	while (1) {
 		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
@@ -3756,36 +3806,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 
 		clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
 		while (start <= end) {
-			index = start >> PAGE_CACHE_SHIFT;
-			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
-			page = find_get_page(btree_inode->i_mapping, index);
-			if (!page)
+			eb = btrfs_find_tree_block(root, start,
+						   root->leafsize);
+			start += eb->len;
+			if (!eb)
 				continue;
-			offset = page_offset(page);
-
-			spin_lock(&dirty_pages->buffer_lock);
-			eb = radix_tree_lookup(
-			     &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
-					       offset >> PAGE_CACHE_SHIFT);
-			spin_unlock(&dirty_pages->buffer_lock);
-			if (eb)
-				ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
-							 &eb->bflags);
-			if (PageWriteback(page))
-				end_page_writeback(page);
-
-			lock_page(page);
-			if (PageDirty(page)) {
-				clear_page_dirty_for_io(page);
-				spin_lock_irq(&page->mapping->tree_lock);
-				radix_tree_tag_clear(&page->mapping->page_tree,
-							page_index(page),
-							PAGECACHE_TAG_DIRTY);
-				spin_unlock_irq(&page->mapping->tree_lock);
-			}
+			wait_on_extent_buffer_writeback(eb);
 
-			unlock_page(page);
-			page_cache_release(page);
+			if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+					       &eb->bflags))
+				clear_extent_buffer_dirty(eb);
+			free_extent_buffer_stale(eb);
 		}
 	}
 
@@ -3866,7 +3897,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
 	*/
 }
 
-int btrfs_cleanup_transaction(struct btrfs_root *root)
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
 {
 	struct btrfs_transaction *t;
 	LIST_HEAD(list);
@@ -3887,10 +3918,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
 
 		btrfs_destroy_delayed_refs(t, root);
 
-		btrfs_block_rsv_release(root,
-					&root->fs_info->trans_block_rsv,
-					t->dirty_pages.dirty_bytes);
-
 		/* FIXME: cleanup wait for commit */
 		t->in_commit = 1;
 		t->blocked = 1;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 034d7dc..be69ce1 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
-void btrfs_error_commit_super(struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 					    u64 bytenr, u32 blocksize);
 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
@@ -77,7 +76,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
+u32 btrfs_csum_data(char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, char *result);
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			int metadata);
@@ -93,10 +92,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info);
 int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root);
-int btrfs_cleanup_transaction(struct btrfs_root *root);
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
 				  struct btrfs_root *root);
-void btrfs_abort_devices(struct btrfs_root *root);
 struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 				     struct btrfs_fs_info *fs_info,
 				     u64 objectid);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d55123..2305b5c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
 				       u64 num_bytes, int reserve);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
 			       u64 num_bytes);
+int btrfs_pin_extent(struct btrfs_root *root,
+		     u64 bytenr, u64 num_bytes, int reserved);
 
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -270,9 +272,27 @@ static int exclude_super_stripes(struct btrfs_root *root,
 			return ret;
 
 		while (nr--) {
-			cache->bytes_super += stripe_len;
-			ret = add_excluded_extent(root, logical[nr],
-						  stripe_len);
+			u64 start, len;
+
+			if (logical[nr] > cache->key.objectid +
+			    cache->key.offset)
+				continue;
+
+			if (logical[nr] + stripe_len <= cache->key.objectid)
+				continue;
+
+			start = logical[nr];
+			if (start < cache->key.objectid) {
+				start = cache->key.objectid;
+				len = (logical[nr] + stripe_len) - start;
+			} else {
+				len = min_t(u64, stripe_len,
+					    cache->key.objectid +
+					    cache->key.offset - start);
+			}
+
+			cache->bytes_super += len;
+			ret = add_excluded_extent(root, start, len);
 			if (ret) {
 				kfree(logical);
 				return ret;
@@ -419,8 +439,7 @@ again:
 			if (ret)
 				break;
 
-			if (need_resched() ||
-			    btrfs_next_leaf(extent_root, path)) {
+			if (need_resched()) {
 				caching_ctl->progress = last;
 				btrfs_release_path(path);
 				up_read(&fs_info->extent_commit_sem);
@@ -428,6 +447,12 @@ again:
 				cond_resched();
 				goto again;
 			}
+
+			ret = btrfs_next_leaf(extent_root, path);
+			if (ret < 0)
+				goto err;
+			if (ret)
+				break;
 			leaf = path->nodes[0];
 			nritems = btrfs_header_nritems(leaf);
 			continue;
@@ -442,11 +467,16 @@ again:
 		    block_group->key.offset)
 			break;
 
-		if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+		    key.type == BTRFS_METADATA_ITEM_KEY) {
 			total_found += add_new_free_space(block_group,
 							  fs_info, last,
 							  key.objectid);
-			last = key.objectid + key.offset;
+			if (key.type == BTRFS_METADATA_ITEM_KEY)
+				last = key.objectid +
+					fs_info->tree_root->leafsize;
+			else
+				last = key.objectid + key.offset;
 
 			if (total_found > (1024 * 1024 * 2)) {
 				total_found = 0;
@@ -656,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
 	rcu_read_unlock();
 }
 
-u64 btrfs_find_block_group(struct btrfs_root *root,
-			   u64 search_start, u64 search_hint, int owner)
-{
-	struct btrfs_block_group_cache *cache;
-	u64 used;
-	u64 last = max(search_hint, search_start);
-	u64 group_start = 0;
-	int full_search = 0;
-	int factor = 9;
-	int wrapped = 0;
-again:
-	while (1) {
-		cache = btrfs_lookup_first_block_group(root->fs_info, last);
-		if (!cache)
-			break;
-
-		spin_lock(&cache->lock);
-		last = cache->key.objectid + cache->key.offset;
-		used = btrfs_block_group_used(&cache->item);
-
-		if ((full_search || !cache->ro) &&
-		    block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
-			if (used + cache->pinned + cache->reserved <
-			    div_factor(cache->key.offset, factor)) {
-				group_start = cache->key.objectid;
-				spin_unlock(&cache->lock);
-				btrfs_put_block_group(cache);
-				goto found;
-			}
-		}
-		spin_unlock(&cache->lock);
-		btrfs_put_block_group(cache);
-		cond_resched();
-	}
-	if (!wrapped) {
-		last = search_start;
-		wrapped = 1;
-		goto again;
-	}
-	if (!full_search && factor < 10) {
-		last = search_start;
-		full_search = 1;
-		factor = 10;
-		goto again;
-	}
-found:
-	return group_start;
-}
-
 /* simple helper to search for an existing extent at a given offset */
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 {
@@ -718,15 +699,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 
 	key.objectid = start;
 	key.offset = len;
-	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+	key.type = BTRFS_EXTENT_ITEM_KEY;
 	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
 				0, 0);
+	if (ret > 0) {
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.objectid == start &&
+		    key.type == BTRFS_METADATA_ITEM_KEY)
+			ret = 0;
+	}
 	btrfs_free_path(path);
 	return ret;
 }
 
 /*
- * helper function to lookup reference count and flags of extent.
+ * helper function to lookup reference count and flags of a tree block.
  *
  * the head node for delayed ref is used to store the sum of all the
  * reference count modifications queued up in the rbtree. the head
@@ -736,7 +723,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
  */
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
-			     u64 num_bytes, u64 *refs, u64 *flags)
+			     u64 offset, int metadata, u64 *refs, u64 *flags)
 {
 	struct btrfs_delayed_ref_head *head;
 	struct btrfs_delayed_ref_root *delayed_refs;
@@ -749,13 +736,29 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 	u64 extent_flags;
 	int ret;
 
+	/*
+	 * If we don't have skinny metadata, don't bother doing anything
+	 * different
+	 */
+	if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
+		offset = root->leafsize;
+		metadata = 0;
+	}
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
-	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_ITEM_KEY;
-	key.offset = num_bytes;
+	if (metadata) {
+		key.objectid = bytenr;
+		key.type = BTRFS_METADATA_ITEM_KEY;
+		key.offset = offset;
+	} else {
+		key.objectid = bytenr;
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = offset;
+	}
+
 	if (!trans) {
 		path->skip_locking = 1;
 		path->search_commit_root = 1;
@@ -766,6 +769,13 @@ again:
 	if (ret < 0)
 		goto out_free;
 
+	if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = root->leafsize;
+		btrfs_release_path(path);
+		goto again;
+	}
+
 	if (ret == 0) {
 		leaf = path->nodes[0];
 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1001,7 +1011,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
 		return ret;
 	BUG_ON(ret); /* Corruption */
 
-	btrfs_extend_item(trans, root, path, new_size);
+	btrfs_extend_item(root, path, new_size);
 
 	leaf = path->nodes[0];
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1453,6 +1463,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	int want;
 	int ret;
 	int err = 0;
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
 
 	key.objectid = bytenr;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1464,11 +1476,46 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		path->keep_locks = 1;
 	} else
 		extra_size = -1;
+
+	/*
+	 * Owner is our parent level, so we can just add one to get the level
+	 * for the block we are interested in.
+	 */
+	if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
+		key.type = BTRFS_METADATA_ITEM_KEY;
+		key.offset = owner;
+	}
+
+again:
 	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
 	if (ret < 0) {
 		err = ret;
 		goto out;
 	}
+
+	/*
+	 * We may be a newly converted file system which still has the old fat
+	 * extent entries for metadata, so try and see if we have one of those.
+	 */
+	if (ret > 0 && skinny_metadata) {
+		skinny_metadata = false;
+		if (path->slots[0]) {
+			path->slots[0]--;
+			btrfs_item_key_to_cpu(path->nodes[0], &key,
+					      path->slots[0]);
+			if (key.objectid == bytenr &&
+			    key.type == BTRFS_EXTENT_ITEM_KEY &&
+			    key.offset == num_bytes)
+				ret = 0;
+		}
+		if (ret) {
+			key.type = BTRFS_EXTENT_ITEM_KEY;
+			key.offset = num_bytes;
+			btrfs_release_path(path);
+			goto again;
+		}
+	}
+
 	if (ret && !insert) {
 		err = -ENOENT;
 		goto out;
@@ -1504,11 +1551,9 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	ptr = (unsigned long)(ei + 1);
 	end = (unsigned long)ei + item_size;
 
-	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
 		ptr += sizeof(struct btrfs_tree_block_info);
 		BUG_ON(ptr > end);
-	} else {
-		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
 	}
 
 	err = -ENOENT;
@@ -1590,8 +1635,7 @@ out:
  * helper to add new inline back ref
  */
 static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root,
+void setup_inline_extent_backref(struct btrfs_root *root,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref *iref,
 				 u64 parent, u64 root_objectid,
@@ -1614,7 +1658,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	type = extent_ref_type(parent, owner);
 	size = btrfs_extent_inline_ref_size(type);
 
-	btrfs_extend_item(trans, root, path, size);
+	btrfs_extend_item(root, path, size);
 
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	refs = btrfs_extent_refs(leaf, ei);
@@ -1683,8 +1727,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
  * helper to update/remove inline back ref
  */
 static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_trans_handle *trans,
-				  struct btrfs_root *root,
+void update_inline_extent_backref(struct btrfs_root *root,
 				  struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
@@ -1740,7 +1783,7 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans,
 			memmove_extent_buffer(leaf, ptr, ptr + size,
 					      end - ptr - size);
 		item_size -= size;
-		btrfs_truncate_item(trans, root, path, item_size, 1);
+		btrfs_truncate_item(root, path, item_size, 1);
 	}
 	btrfs_mark_buffer_dirty(leaf);
 }
@@ -1762,10 +1805,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 					   root_objectid, owner, offset, 1);
 	if (ret == 0) {
 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
-		update_inline_extent_backref(trans, root, path, iref,
+		update_inline_extent_backref(root, path, iref,
 					     refs_to_add, extent_op);
 	} else if (ret == -ENOENT) {
-		setup_inline_extent_backref(trans, root, path, iref, parent,
+		setup_inline_extent_backref(root, path, iref, parent,
 					    root_objectid, owner, offset,
 					    refs_to_add, extent_op);
 		ret = 0;
@@ -1802,7 +1845,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 
 	BUG_ON(!is_data && refs_to_drop != 1);
 	if (iref) {
-		update_inline_extent_backref(trans, root, path, iref,
+		update_inline_extent_backref(root, path, iref,
 					     -refs_to_drop, NULL);
 	} else if (is_data) {
 		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
@@ -1973,10 +2016,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 		ref_root = ref->root;
 
 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
-		if (extent_op) {
-			BUG_ON(extent_op->update_key);
+		if (extent_op)
 			flags |= extent_op->flags_to_set;
-		}
 		ret = alloc_reserved_file_extent(trans, root,
 						 parent, ref_root, flags,
 						 ref->objectid, ref->offset,
@@ -2029,18 +2070,33 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
 	u32 item_size;
 	int ret;
 	int err = 0;
+	int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+			node->type == BTRFS_SHARED_BLOCK_REF_KEY);
 
 	if (trans->aborted)
 		return 0;
 
+	if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
+		metadata = 0;
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
 	key.objectid = node->bytenr;
-	key.type = BTRFS_EXTENT_ITEM_KEY;
-	key.offset = node->num_bytes;
 
+	if (metadata) {
+		struct btrfs_delayed_tree_ref *tree_ref;
+
+		tree_ref = btrfs_delayed_node_to_tree_ref(node);
+		key.type = BTRFS_METADATA_ITEM_KEY;
+		key.offset = tree_ref->level;
+	} else {
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = node->num_bytes;
+	}
+
+again:
 	path->reada = 1;
 	path->leave_spinning = 1;
 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
@@ -2050,6 +2106,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 	if (ret > 0) {
+		if (metadata) {
+			btrfs_release_path(path);
+			metadata = 0;
+
+			key.offset = node->num_bytes;
+			key.type = BTRFS_EXTENT_ITEM_KEY;
+			goto again;
+		}
 		err = -EIO;
 		goto out;
 	}
@@ -2089,10 +2153,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 	struct btrfs_key ins;
 	u64 parent = 0;
 	u64 ref_root = 0;
-
-	ins.objectid = node->bytenr;
-	ins.offset = node->num_bytes;
-	ins.type = BTRFS_EXTENT_ITEM_KEY;
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
 
 	ref = btrfs_delayed_node_to_tree_ref(node);
 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
@@ -2100,10 +2162,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 	else
 		ref_root = ref->root;
 
+	ins.objectid = node->bytenr;
+	if (skinny_metadata) {
+		ins.offset = ref->level;
+		ins.type = BTRFS_METADATA_ITEM_KEY;
+	} else {
+		ins.offset = node->num_bytes;
+		ins.type = BTRFS_EXTENT_ITEM_KEY;
+	}
+
 	BUG_ON(node->ref_mod != 1);
 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
-		BUG_ON(!extent_op || !extent_op->update_flags ||
-		       !extent_op->update_key);
+		BUG_ON(!extent_op || !extent_op->update_flags);
 		ret = alloc_reserved_tree_block(trans, root,
 						parent, ref_root,
 						extent_op->flags_to_set,
@@ -2307,9 +2377,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 				btrfs_free_delayed_extent_op(extent_op);
 
 				if (ret) {
-					printk(KERN_DEBUG
-					       "btrfs: run_delayed_extent_op "
-					       "returned %d\n", ret);
+					btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
 					spin_lock(&delayed_refs->lock);
 					btrfs_delayed_ref_unlock(locked_ref);
 					return ret;
@@ -2348,8 +2416,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 		if (ret) {
 			btrfs_delayed_ref_unlock(locked_ref);
 			btrfs_put_delayed_ref(ref);
-			printk(KERN_DEBUG
-			       "btrfs: run_one_delayed_ref returned %d\n", ret);
+			btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
 			spin_lock(&delayed_refs->lock);
 			return ret;
 		}
@@ -2426,9 +2493,11 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 	if (list_empty(&trans->qgroup_ref_list) !=
 	    !trans->delayed_ref_elem.seq) {
 		/* list without seq or seq without list */
-		printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
+		btrfs_err(fs_info,
+			"qgroup accounting update error, list is%s empty, seq is %#x.%x",
 			list_empty(&trans->qgroup_ref_list) ? "" : " not",
-			trans->delayed_ref_elem.seq);
+			(u32)(trans->delayed_ref_elem.seq >> 32),
+			(u32)trans->delayed_ref_elem.seq);
 		BUG();
 	}
 
@@ -3337,7 +3406,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
  * progress (either running or paused) picks the target profile (if it's
  * already available), otherwise falls back to plain reducing.
  */
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
 	/*
 	 * we add in the count of missing devices because we want
@@ -3557,6 +3626,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
 	rcu_read_unlock();
 }
 
+static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
+{
+	return (global->size << 1);
+}
+
 static int should_alloc_chunk(struct btrfs_root *root,
 			      struct btrfs_space_info *sinfo, int force)
 {
@@ -3574,7 +3648,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
 	 * global_rsv, it doesn't change except when the transaction commits.
 	 */
 	if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
-		num_allocated += global_rsv->size;
+		num_allocated += calc_global_rsv_need_space(global_rsv);
 
 	/*
 	 * in limited mode, we want to have some free space up to
@@ -3627,8 +3701,8 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
 
 	thresh = get_system_chunk_thresh(root, type);
 	if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
-		printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
-		       left, thresh, type);
+		btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
+			left, thresh, type);
 		dump_space_info(info, 0, 0);
 	}
 
@@ -3746,7 +3820,7 @@ static int can_overcommit(struct btrfs_root *root,
 {
 	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
 	u64 profile = btrfs_get_alloc_profile(root, 0);
-	u64 rsv_size = 0;
+	u64 space_size;
 	u64 avail;
 	u64 used;
 	u64 to_add;
@@ -3754,18 +3828,16 @@ static int can_overcommit(struct btrfs_root *root,
 	used = space_info->bytes_used + space_info->bytes_reserved +
 		space_info->bytes_pinned + space_info->bytes_readonly;
 
-	spin_lock(&global_rsv->lock);
-	rsv_size = global_rsv->size;
-	spin_unlock(&global_rsv->lock);
-
 	/*
 	 * We only want to allow over committing if we have lots of actual space
 	 * free, but if we don't have enough space to handle the global reserve
 	 * space then we could end up having a real enospc problem when trying
 	 * to allocate a chunk or some other such important allocation.
 	 */
-	rsv_size <<= 1;
-	if (used + rsv_size >= space_info->total_bytes)
+	spin_lock(&global_rsv->lock);
+	space_size = calc_global_rsv_need_space(global_rsv);
+	spin_unlock(&global_rsv->lock);
+	if (used + space_size >= space_info->total_bytes)
 		return 0;
 
 	used += space_info->bytes_may_use;
@@ -3808,8 +3880,8 @@ static int can_overcommit(struct btrfs_root *root,
 	return 0;
 }
 
-void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
-				  unsigned long nr_pages)
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+					 unsigned long nr_pages)
 {
 	struct super_block *sb = root->fs_info->sb;
 	int started;
@@ -3826,7 +3898,8 @@ void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
 		 * the disk).
 		 */
 		btrfs_start_delalloc_inodes(root, 0);
-		btrfs_wait_ordered_extents(root, 0);
+		if (!current->journal_info)
+			btrfs_wait_ordered_extents(root, 0);
 	}
 }
 
@@ -5090,9 +5163,11 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
 				    u64 bytenr, u64 num_bytes)
 {
 	struct btrfs_block_group_cache *cache;
+	int ret;
 
 	cache = btrfs_lookup_block_group(root->fs_info, bytenr);
-	BUG_ON(!cache); /* Logic error */
+	if (!cache)
+		return -EINVAL;
 
 	/*
 	 * pull in the free space cache (if any) so that our pin
@@ -5105,9 +5180,9 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
 	pin_down_extent(root, cache, bytenr, num_bytes, 0);
 
 	/* remove us from the free space cache (if we're there at all) */
-	btrfs_remove_free_space(cache, bytenr, num_bytes);
+	ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
 	btrfs_put_block_group(cache);
-	return 0;
+	return ret;
 }
 
 /**
@@ -5312,6 +5387,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	int num_to_del = 1;
 	u32 item_size;
 	u64 refs;
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -5323,6 +5400,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
 	BUG_ON(!is_data && refs_to_drop != 1);
 
+	if (is_data)
+		skinny_metadata = 0;
+
 	ret = lookup_extent_backref(trans, extent_root, path, &iref,
 				    bytenr, num_bytes, parent,
 				    root_objectid, owner_objectid,
@@ -5339,6 +5419,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 				found_extent = 1;
 				break;
 			}
+			if (key.type == BTRFS_METADATA_ITEM_KEY &&
+			    key.offset == owner_objectid) {
+				found_extent = 1;
+				break;
+			}
 			if (path->slots[0] - extent_slot > 5)
 				break;
 			extent_slot--;
@@ -5364,12 +5449,39 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			key.type = BTRFS_EXTENT_ITEM_KEY;
 			key.offset = num_bytes;
 
+			if (!is_data && skinny_metadata) {
+				key.type = BTRFS_METADATA_ITEM_KEY;
+				key.offset = owner_objectid;
+			}
+
 			ret = btrfs_search_slot(trans, extent_root,
 						&key, path, -1, 1);
+			if (ret > 0 && skinny_metadata && path->slots[0]) {
+				/*
+				 * Couldn't find our skinny metadata item,
+				 * see if we have ye olde extent item.
+				 */
+				path->slots[0]--;
+				btrfs_item_key_to_cpu(path->nodes[0], &key,
+						      path->slots[0]);
+				if (key.objectid == bytenr &&
+				    key.type == BTRFS_EXTENT_ITEM_KEY &&
+				    key.offset == num_bytes)
+					ret = 0;
+			}
+
+			if (ret > 0 && skinny_metadata) {
+				skinny_metadata = false;
+				key.type = BTRFS_EXTENT_ITEM_KEY;
+				key.offset = num_bytes;
+				btrfs_release_path(path);
+				ret = btrfs_search_slot(trans, extent_root,
+							&key, path, -1, 1);
+			}
+
 			if (ret) {
-				printk(KERN_ERR "umm, got %d back from search"
-				       ", was looking for %llu\n", ret,
-				       (unsigned long long)bytenr);
+				btrfs_err(info, "umm, got %d back from search, was looking for %llu",
+					ret, (unsigned long long)bytenr);
 				if (ret > 0)
 					btrfs_print_leaf(extent_root,
 							 path->nodes[0]);
@@ -5383,13 +5495,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	} else if (ret == -ENOENT) {
 		btrfs_print_leaf(extent_root, path->nodes[0]);
 		WARN_ON(1);
-		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
-		       "parent %llu root %llu  owner %llu offset %llu\n",
-		       (unsigned long long)bytenr,
-		       (unsigned long long)parent,
-		       (unsigned long long)root_objectid,
-		       (unsigned long long)owner_objectid,
-		       (unsigned long long)owner_offset);
+		btrfs_err(info,
+			"unable to find ref byte nr %llu parent %llu root %llu  owner %llu offset %llu",
+			(unsigned long long)bytenr,
+			(unsigned long long)parent,
+			(unsigned long long)root_objectid,
+			(unsigned long long)owner_objectid,
+			(unsigned long long)owner_offset);
 	} else {
 		btrfs_abort_transaction(trans, extent_root, ret);
 		goto out;
@@ -5417,9 +5529,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 		ret = btrfs_search_slot(trans, extent_root, &key, path,
 					-1, 1);
 		if (ret) {
-			printk(KERN_ERR "umm, got %d back from search"
-			       ", was looking for %llu\n", ret,
-			       (unsigned long long)bytenr);
+			btrfs_err(info, "umm, got %d back from search, was looking for %llu",
+				ret, (unsigned long long)bytenr);
 			btrfs_print_leaf(extent_root, path->nodes[0]);
 		}
 		if (ret < 0) {
@@ -5435,7 +5546,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	BUG_ON(item_size < sizeof(*ei));
 	ei = btrfs_item_ptr(leaf, extent_slot,
 			    struct btrfs_extent_item);
-	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
+	    key.type == BTRFS_EXTENT_ITEM_KEY) {
 		struct btrfs_tree_block_info *bi;
 		BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
 		bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -5443,7 +5555,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	}
 
 	refs = btrfs_extent_refs(leaf, ei);
-	BUG_ON(refs < refs_to_drop);
+	if (refs < refs_to_drop) {
+		btrfs_err(info, "trying to drop %d refs but we only have %Lu "
+			  "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+		ret = -EINVAL;
+		btrfs_abort_transaction(trans, extent_root, ret);
+		goto out;
+	}
 	refs -= refs_to_drop;
 
 	if (refs > 0) {
@@ -5758,7 +5876,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *orig_root,
 				     u64 num_bytes, u64 empty_size,
 				     u64 hint_byte, struct btrfs_key *ins,
-				     u64 data)
+				     u64 flags)
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -5769,8 +5887,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	int empty_cluster = 2 * 1024 * 1024;
 	struct btrfs_space_info *space_info;
 	int loop = 0;
-	int index = __get_raid_index(data);
-	int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
+	int index = __get_raid_index(flags);
+	int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
 		RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
 	bool found_uncached_bg = false;
 	bool failed_cluster_refill = false;
@@ -5783,11 +5901,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	ins->objectid = 0;
 	ins->offset = 0;
 
-	trace_find_free_extent(orig_root, num_bytes, empty_size, data);
+	trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
 
-	space_info = __find_space_info(root->fs_info, data);
+	space_info = __find_space_info(root->fs_info, flags);
 	if (!space_info) {
-		printk(KERN_ERR "No space info for %llu\n", data);
+		btrfs_err(root->fs_info, "No space info for %llu", flags);
 		return -ENOSPC;
 	}
 
@@ -5798,13 +5916,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	if (btrfs_mixed_space_info(space_info))
 		use_cluster = false;
 
-	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
+	if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
 		last_ptr = &root->fs_info->meta_alloc_cluster;
 		if (!btrfs_test_opt(root, SSD))
 			empty_cluster = 64 * 1024;
 	}
 
-	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+	if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
 	    btrfs_test_opt(root, SSD)) {
 		last_ptr = &root->fs_info->data_alloc_cluster;
 	}
@@ -5833,7 +5951,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 		 * However if we are re-searching with an ideal block group
 		 * picked out then we don't care that the block group is cached.
 		 */
-		if (block_group && block_group_bits(block_group, data) &&
+		if (block_group && block_group_bits(block_group, flags) &&
 		    block_group->cached != BTRFS_CACHE_NO) {
 			down_read(&space_info->groups_sem);
 			if (list_empty(&block_group->list) ||
@@ -5871,7 +5989,7 @@ search:
 		 * raid types, but we want to make sure we only allocate
 		 * for the proper type.
 		 */
-		if (!block_group_bits(block_group, data)) {
+		if (!block_group_bits(block_group, flags)) {
 		    u64 extra = BTRFS_BLOCK_GROUP_DUP |
 				BTRFS_BLOCK_GROUP_RAID1 |
 				BTRFS_BLOCK_GROUP_RAID5 |
@@ -5883,7 +6001,7 @@ search:
 			 * doesn't provide them, bail.  This does allow us to
 			 * fill raid0 from raid1.
 			 */
-			if ((data & extra) && !(block_group->flags & extra))
+			if ((flags & extra) && !(block_group->flags & extra))
 				goto loop;
 		}
 
@@ -5914,7 +6032,7 @@ have_block_group:
 			if (used_block_group != block_group &&
 			    (!used_block_group ||
 			     used_block_group->ro ||
-			     !block_group_bits(used_block_group, data))) {
+			     !block_group_bits(used_block_group, flags))) {
 				used_block_group = block_group;
 				goto refill_cluster;
 			}
@@ -6110,7 +6228,7 @@ loop:
 		index = 0;
 		loop++;
 		if (loop == LOOP_ALLOC_CHUNK) {
-			ret = do_chunk_alloc(trans, root, data,
+			ret = do_chunk_alloc(trans, root, flags,
 					     CHUNK_ALLOC_FORCE);
 			/*
 			 * Do not bail out on ENOSPC since we
@@ -6188,16 +6306,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 num_bytes, u64 min_alloc_size,
 			 u64 empty_size, u64 hint_byte,
-			 struct btrfs_key *ins, u64 data)
+			 struct btrfs_key *ins, int is_data)
 {
 	bool final_tried = false;
+	u64 flags;
 	int ret;
 
-	data = btrfs_get_alloc_profile(root, data);
+	flags = btrfs_get_alloc_profile(root, is_data);
 again:
 	WARN_ON(num_bytes < root->sectorsize);
 	ret = find_free_extent(trans, root, num_bytes, empty_size,
-			       hint_byte, ins, data);
+			       hint_byte, ins, flags);
 
 	if (ret == -ENOSPC) {
 		if (!final_tried) {
@@ -6210,10 +6329,10 @@ again:
 		} else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
 			struct btrfs_space_info *sinfo;
 
-			sinfo = __find_space_info(root->fs_info, data);
-			printk(KERN_ERR "btrfs allocation failed flags %llu, "
-			       "wanted %llu\n", (unsigned long long)data,
-			       (unsigned long long)num_bytes);
+			sinfo = __find_space_info(root->fs_info, flags);
+			btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
+				(unsigned long long)flags,
+				(unsigned long long)num_bytes);
 			if (sinfo)
 				dump_space_info(sinfo, num_bytes, 1);
 		}
@@ -6232,8 +6351,8 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
 
 	cache = btrfs_lookup_block_group(root->fs_info, start);
 	if (!cache) {
-		printk(KERN_ERR "Unable to find block group for %llu\n",
-		       (unsigned long long)start);
+		btrfs_err(root->fs_info, "Unable to find block group for %llu",
+			(unsigned long long)start);
 		return -ENOSPC;
 	}
 
@@ -6328,9 +6447,9 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 
 	ret = update_block_group(root, ins->objectid, ins->offset, 1);
 	if (ret) { /* -ENOENT, logic error */
-		printk(KERN_ERR "btrfs update block group failed for %llu "
-		       "%llu\n", (unsigned long long)ins->objectid,
-		       (unsigned long long)ins->offset);
+		btrfs_err(fs_info, "update block group failed for %llu %llu",
+			(unsigned long long)ins->objectid,
+			(unsigned long long)ins->offset);
 		BUG();
 	}
 	return ret;
@@ -6349,7 +6468,12 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
+	u32 size = sizeof(*extent_item) + sizeof(*iref);
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
+
+	if (!skinny_metadata)
+		size += sizeof(*block_info);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -6370,12 +6494,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
 	btrfs_set_extent_flags(leaf, extent_item,
 			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
-	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
 
-	btrfs_set_tree_block_key(leaf, block_info, key);
-	btrfs_set_tree_block_level(leaf, block_info, level);
+	if (skinny_metadata) {
+		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+	} else {
+		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+		btrfs_set_tree_block_key(leaf, block_info, key);
+		btrfs_set_tree_block_level(leaf, block_info, level);
+		iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+	}
 
-	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
 	if (parent > 0) {
 		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
 		btrfs_set_extent_inline_ref_type(leaf, iref,
@@ -6390,11 +6518,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
 
-	ret = update_block_group(root, ins->objectid, ins->offset, 1);
+	ret = update_block_group(root, ins->objectid, root->leafsize, 1);
 	if (ret) { /* -ENOENT, logic error */
-		printk(KERN_ERR "btrfs update block group failed for %llu "
-		       "%llu\n", (unsigned long long)ins->objectid,
-		       (unsigned long long)ins->offset);
+		btrfs_err(fs_info, "update block group failed for %llu %llu",
+			(unsigned long long)ins->objectid,
+			(unsigned long long)ins->offset);
 		BUG();
 	}
 	return ret;
@@ -6439,47 +6567,48 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	if (!caching_ctl) {
 		BUG_ON(!block_group_cache_done(block_group));
 		ret = btrfs_remove_free_space(block_group, start, num_bytes);
-		BUG_ON(ret); /* -ENOMEM */
+		if (ret)
+			goto out;
 	} else {
 		mutex_lock(&caching_ctl->mutex);
 
 		if (start >= caching_ctl->progress) {
 			ret = add_excluded_extent(root, start, num_bytes);
-			BUG_ON(ret); /* -ENOMEM */
 		} else if (start + num_bytes <= caching_ctl->progress) {
 			ret = btrfs_remove_free_space(block_group,
 						      start, num_bytes);
-			BUG_ON(ret); /* -ENOMEM */
 		} else {
 			num_bytes = caching_ctl->progress - start;
 			ret = btrfs_remove_free_space(block_group,
 						      start, num_bytes);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				goto out_lock;
 
 			start = caching_ctl->progress;
 			num_bytes = ins->objectid + ins->offset -
 				    caching_ctl->progress;
 			ret = add_excluded_extent(root, start, num_bytes);
-			BUG_ON(ret); /* -ENOMEM */
 		}
-
+out_lock:
 		mutex_unlock(&caching_ctl->mutex);
 		put_caching_control(caching_ctl);
+		if (ret)
+			goto out;
 	}
 
 	ret = btrfs_update_reserved_bytes(block_group, ins->offset,
 					  RESERVE_ALLOC_NO_ACCOUNT);
 	BUG_ON(ret); /* logic error */
-	btrfs_put_block_group(block_group);
 	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
 					 0, owner, offset, ins, 1);
+out:
+	btrfs_put_block_group(block_group);
 	return ret;
 }
 
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
-					    struct btrfs_root *root,
-					    u64 bytenr, u32 blocksize,
-					    int level)
+static struct extent_buffer *
+btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		      u64 bytenr, u32 blocksize, int level)
 {
 	struct extent_buffer *buf;
 
@@ -6594,7 +6723,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 	struct extent_buffer *buf;
 	u64 flags = 0;
 	int ret;
-
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
 
 	block_rsv = use_block_rsv(trans, root, blocksize);
 	if (IS_ERR(block_rsv))
@@ -6627,7 +6757,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 		else
 			memset(&extent_op->key, 0, sizeof(extent_op->key));
 		extent_op->flags_to_set = flags;
-		extent_op->update_key = 1;
+		if (skinny_metadata)
+			extent_op->update_key = 0;
+		else
+			extent_op->update_key = 1;
 		extent_op->update_flags = 1;
 		extent_op->is_data = 0;
 
@@ -6704,8 +6837,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
 			continue;
 
 		/* We don't lock the tree block, it's OK to be racy here */
-		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-					       &refs, &flags);
+		ret = btrfs_lookup_extent_info(trans, root, bytenr,
+					       wc->level - 1, 1, &refs,
+					       &flags);
 		/* We don't care about errors in readahead. */
 		if (ret < 0)
 			continue;
@@ -6772,7 +6906,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 	     (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
 		BUG_ON(!path->locks[level]);
 		ret = btrfs_lookup_extent_info(trans, root,
-					       eb->start, eb->len,
+					       eb->start, level, 1,
 					       &wc->refs[level],
 					       &wc->flags[level]);
 		BUG_ON(ret == -ENOMEM);
@@ -6870,7 +7004,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	btrfs_tree_lock(next);
 	btrfs_set_lock_blocking(next);
 
-	ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+	ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
 				       &wc->refs[level - 1],
 				       &wc->flags[level - 1]);
 	if (ret < 0) {
@@ -6878,7 +7012,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	BUG_ON(wc->refs[level - 1] == 0);
+	if (unlikely(wc->refs[level - 1] == 0)) {
+		btrfs_err(root->fs_info, "Missing references.");
+		BUG();
+	}
 	*lookup_info = 0;
 
 	if (wc->stage == DROP_REFERENCE) {
@@ -6917,8 +7054,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 		if (reada && level == 1)
 			reada_walk_down(trans, root, wc, path);
 		next = read_tree_block(root, bytenr, blocksize, generation);
-		if (!next)
+		if (!next || !extent_buffer_uptodate(next)) {
+			free_extent_buffer(next);
 			return -EIO;
+		}
 		btrfs_tree_lock(next);
 		btrfs_set_lock_blocking(next);
 	}
@@ -7001,7 +7140,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
 			path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
 			ret = btrfs_lookup_extent_info(trans, root,
-						       eb->start, eb->len,
+						       eb->start, level, 1,
 						       &wc->refs[level],
 						       &wc->flags[level]);
 			if (ret < 0) {
@@ -7137,6 +7276,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * reference count by one. if update_ref is true, this function
  * also make sure backrefs for the shared block and all lower level
  * blocks are properly updated.
+ *
+ * If called with for_reloc == 0, may exit early with -EAGAIN
  */
 int btrfs_drop_snapshot(struct btrfs_root *root,
 			 struct btrfs_block_rsv *block_rsv, int update_ref,
@@ -7211,8 +7352,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 
 			ret = btrfs_lookup_extent_info(trans, root,
 						path->nodes[level]->start,
-						path->nodes[level]->len,
-						&wc->refs[level],
+						level, 1, &wc->refs[level],
 						&wc->flags[level]);
 			if (ret < 0) {
 				err = ret;
@@ -7238,6 +7378,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
 	while (1) {
+		if (!for_reloc && btrfs_fs_closing(root->fs_info)) {
+			pr_debug("btrfs: drop snapshot early exit\n");
+			err = -EAGAIN;
+			goto out_end_trans;
+		}
+
 		ret = walk_down_tree(trans, root, path, wc);
 		if (ret < 0) {
 			err = ret;
@@ -8020,10 +8166,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 			free_excluded_extents(root, cache);
 		}
 
+		ret = btrfs_add_block_group_cache(root->fs_info, cache);
+		if (ret) {
+			btrfs_remove_free_space_cache(cache);
+			btrfs_put_block_group(cache);
+			goto error;
+		}
+
 		ret = update_space_info(info, cache->flags, found_key.offset,
 					btrfs_block_group_used(&cache->item),
 					&space_info);
-		BUG_ON(ret); /* -ENOMEM */
+		if (ret) {
+			btrfs_remove_free_space_cache(cache);
+			spin_lock(&info->block_group_cache_lock);
+			rb_erase(&cache->cache_node,
+				 &info->block_group_cache_tree);
+			spin_unlock(&info->block_group_cache_lock);
+			btrfs_put_block_group(cache);
+			goto error;
+		}
+
 		cache->space_info = space_info;
 		spin_lock(&cache->space_info->lock);
 		cache->space_info->bytes_readonly += cache->bytes_super;
@@ -8031,9 +8193,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
 		__link_block_group(space_info, cache);
 
-		ret = btrfs_add_block_group_cache(root->fs_info, cache);
-		BUG_ON(ret); /* Logic error */
-
 		set_avail_alloc_bits(root->fs_info, cache->flags);
 		if (btrfs_chunk_readonly(root, cache->key.objectid))
 			set_block_group_ro(cache, 1);
@@ -8156,9 +8315,24 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
 	free_excluded_extents(root, cache);
 
+	ret = btrfs_add_block_group_cache(root->fs_info, cache);
+	if (ret) {
+		btrfs_remove_free_space_cache(cache);
+		btrfs_put_block_group(cache);
+		return ret;
+	}
+
 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
 				&cache->space_info);
-	BUG_ON(ret); /* -ENOMEM */
+	if (ret) {
+		btrfs_remove_free_space_cache(cache);
+		spin_lock(&root->fs_info->block_group_cache_lock);
+		rb_erase(&cache->cache_node,
+			 &root->fs_info->block_group_cache_tree);
+		spin_unlock(&root->fs_info->block_group_cache_lock);
+		btrfs_put_block_group(cache);
+		return ret;
+	}
 	update_global_block_rsv(root->fs_info);
 
 	spin_lock(&cache->space_info->lock);
@@ -8167,9 +8341,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
 	__link_block_group(cache->space_info, cache);
 
-	ret = btrfs_add_block_group_cache(root->fs_info, cache);
-	BUG_ON(ret); /* Logic error */
-
 	list_add_tail(&cache->new_bg_list, &trans->new_bgs);
 
 	set_avail_alloc_bits(extent_root->fs_info, type);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 73f2bfe..32d67a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,12 +24,62 @@
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 
+#ifdef CONFIG_BTRFS_DEBUG
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-#define LEAK_DEBUG 0
-#if LEAK_DEBUG
 static DEFINE_SPINLOCK(leak_lock);
+
+static inline
+void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&leak_lock, flags);
+	list_add(new, head);
+	spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_del(struct list_head *entry)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&leak_lock, flags);
+	list_del(entry);
+	spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_check(void)
+{
+	struct extent_state *state;
+	struct extent_buffer *eb;
+
+	while (!list_empty(&states)) {
+		state = list_entry(states.next, struct extent_state, leak_list);
+		printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+		       "state %lu in tree %p refs %d\n",
+		       (unsigned long long)state->start,
+		       (unsigned long long)state->end,
+		       state->state, state->tree, atomic_read(&state->refs));
+		list_del(&state->leak_list);
+		kmem_cache_free(extent_state_cache, state);
+	}
+
+	while (!list_empty(&buffers)) {
+		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+		printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+		       "refs %d\n", (unsigned long long)eb->start,
+		       eb->len, atomic_read(&eb->refs));
+		list_del(&eb->leak_list);
+		kmem_cache_free(extent_buffer_cache, eb);
+	}
+}
+#else
+#define btrfs_leak_debug_add(new, head)	do {} while (0)
+#define btrfs_leak_debug_del(entry)	do {} while (0)
+#define btrfs_leak_debug_check()	do {} while (0)
 #endif
 
 #define BUFFER_LRU_MAX 64
@@ -84,29 +134,7 @@ free_state_cache:
 
 void extent_io_exit(void)
 {
-	struct extent_state *state;
-	struct extent_buffer *eb;
-
-	while (!list_empty(&states)) {
-		state = list_entry(states.next, struct extent_state, leak_list);
-		printk(KERN_ERR "btrfs state leak: start %llu end %llu "
-		       "state %lu in tree %p refs %d\n",
-		       (unsigned long long)state->start,
-		       (unsigned long long)state->end,
-		       state->state, state->tree, atomic_read(&state->refs));
-		list_del(&state->leak_list);
-		kmem_cache_free(extent_state_cache, state);
-
-	}
-
-	while (!list_empty(&buffers)) {
-		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-		printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
-		       "refs %d\n", (unsigned long long)eb->start,
-		       eb->len, atomic_read(&eb->refs));
-		list_del(&eb->leak_list);
-		kmem_cache_free(extent_buffer_cache, eb);
-	}
+	btrfs_leak_debug_check();
 
 	/*
 	 * Make sure all delayed rcu free are flushed before we
@@ -134,9 +162,6 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 static struct extent_state *alloc_extent_state(gfp_t mask)
 {
 	struct extent_state *state;
-#if LEAK_DEBUG
-	unsigned long flags;
-#endif
 
 	state = kmem_cache_alloc(extent_state_cache, mask);
 	if (!state)
@@ -144,11 +169,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 	state->state = 0;
 	state->private = 0;
 	state->tree = NULL;
-#if LEAK_DEBUG
-	spin_lock_irqsave(&leak_lock, flags);
-	list_add(&state->leak_list, &states);
-	spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+	btrfs_leak_debug_add(&state->leak_list, &states);
 	atomic_set(&state->refs, 1);
 	init_waitqueue_head(&state->wq);
 	trace_alloc_extent_state(state, mask, _RET_IP_);
@@ -160,15 +181,8 @@ void free_extent_state(struct extent_state *state)
 	if (!state)
 		return;
 	if (atomic_dec_and_test(&state->refs)) {
-#if LEAK_DEBUG
-		unsigned long flags;
-#endif
 		WARN_ON(state->tree);
-#if LEAK_DEBUG
-		spin_lock_irqsave(&leak_lock, flags);
-		list_del(&state->leak_list);
-		spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+		btrfs_leak_debug_del(&state->leak_list);
 		trace_free_extent_state(state, _RET_IP_);
 		kmem_cache_free(extent_state_cache, state);
 	}
@@ -308,21 +322,21 @@ static void merge_state(struct extent_io_tree *tree,
 }
 
 static void set_state_cb(struct extent_io_tree *tree,
-			 struct extent_state *state, int *bits)
+			 struct extent_state *state, unsigned long *bits)
 {
 	if (tree->ops && tree->ops->set_bit_hook)
 		tree->ops->set_bit_hook(tree->mapping->host, state, bits);
 }
 
 static void clear_state_cb(struct extent_io_tree *tree,
-			   struct extent_state *state, int *bits)
+			   struct extent_state *state, unsigned long *bits)
 {
 	if (tree->ops && tree->ops->clear_bit_hook)
 		tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
 }
 
 static void set_state_bits(struct extent_io_tree *tree,
-			   struct extent_state *state, int *bits);
+			   struct extent_state *state, unsigned long *bits);
 
 /*
  * insert an extent_state struct into the tree.  'bits' are set on the
@@ -336,7 +350,7 @@ static void set_state_bits(struct extent_io_tree *tree,
  */
 static int insert_state(struct extent_io_tree *tree,
 			struct extent_state *state, u64 start, u64 end,
-			int *bits)
+			unsigned long *bits)
 {
 	struct rb_node *node;
 
@@ -424,10 +438,10 @@ static struct extent_state *next_state(struct extent_state *state)
  */
 static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
 					    struct extent_state *state,
-					    int *bits, int wake)
+					    unsigned long *bits, int wake)
 {
 	struct extent_state *next;
-	int bits_to_clear = *bits & ~EXTENT_CTLBITS;
+	unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
 
 	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
 		u64 range = state->end - state->start + 1;
@@ -463,7 +477,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
 	return prealloc;
 }
 
-void extent_io_tree_panic(struct extent_io_tree *tree, int err)
+static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 {
 	btrfs_panic(tree_fs_info(tree), err, "Locking error: "
 		    "Extent tree was modified by another "
@@ -483,7 +497,7 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err)
  * This takes the tree lock, and returns 0 on success and < 0 on error.
  */
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		     int bits, int wake, int delete,
+		     unsigned long bits, int wake, int delete,
 		     struct extent_state **cached_state,
 		     gfp_t mask)
 {
@@ -644,7 +658,8 @@ static void wait_on_state(struct extent_io_tree *tree,
  * The range [start, end] is inclusive.
  * The tree lock is taken by this function
  */
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+			    unsigned long bits)
 {
 	struct extent_state *state;
 	struct rb_node *node;
@@ -685,9 +700,9 @@ out:
 
 static void set_state_bits(struct extent_io_tree *tree,
 			   struct extent_state *state,
-			   int *bits)
+			   unsigned long *bits)
 {
-	int bits_to_set = *bits & ~EXTENT_CTLBITS;
+	unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
 
 	set_state_cb(tree, state, bits);
 	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -730,8 +745,9 @@ static void uncache_state(struct extent_state **cached_ptr)
 
 static int __must_check
 __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		 int bits, int exclusive_bits, u64 *failed_start,
-		 struct extent_state **cached_state, gfp_t mask)
+		 unsigned long bits, unsigned long exclusive_bits,
+		 u64 *failed_start, struct extent_state **cached_state,
+		 gfp_t mask)
 {
 	struct extent_state *state;
 	struct extent_state *prealloc = NULL;
@@ -923,9 +939,9 @@ search_again:
 	goto again;
 }
 
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
-		   u64 *failed_start, struct extent_state **cached_state,
-		   gfp_t mask)
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+		   unsigned long bits, u64 * failed_start,
+		   struct extent_state **cached_state, gfp_t mask)
 {
 	return __set_extent_bit(tree, start, end, bits, 0, failed_start,
 				cached_state, mask);
@@ -950,7 +966,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
  * boundary bits like LOCK.
  */
 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		       int bits, int clear_bits,
+		       unsigned long bits, unsigned long clear_bits,
 		       struct extent_state **cached_state, gfp_t mask)
 {
 	struct extent_state *state;
@@ -1143,14 +1159,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 }
 
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		    int bits, gfp_t mask)
+		    unsigned long bits, gfp_t mask)
 {
 	return set_extent_bit(tree, start, end, bits, NULL,
 			      NULL, mask);
 }
 
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		      int bits, gfp_t mask)
+		      unsigned long bits, gfp_t mask)
 {
 	return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
 }
@@ -1189,7 +1205,7 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
 			struct extent_state **cached_state, gfp_t mask)
 {
-	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
 			      cached_state, mask);
 }
 
@@ -1205,7 +1221,7 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
  * us if waiting is desired.
  */
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		     int bits, struct extent_state **cached_state)
+		     unsigned long bits, struct extent_state **cached_state)
 {
 	int err;
 	u64 failed_start;
@@ -1313,8 +1329,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
  * return it.  tree->lock must be held.  NULL will returned if
  * nothing was found after 'start'
  */
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
-						 u64 start, int bits)
+static struct extent_state *
+find_first_extent_bit_state(struct extent_io_tree *tree,
+			    u64 start, unsigned long bits)
 {
 	struct rb_node *node;
 	struct extent_state *state;
@@ -1348,7 +1365,7 @@ out:
  * If nothing was found, 1 is returned. If found something, return 0.
  */
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-			  u64 *start_ret, u64 *end_ret, int bits,
+			  u64 *start_ret, u64 *end_ret, unsigned long bits,
 			  struct extent_state **cached_state)
 {
 	struct extent_state *state;
@@ -1638,7 +1655,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
-	int clear_bits = 0;
+	unsigned long clear_bits = 0;
 
 	if (op & EXTENT_CLEAR_UNLOCK)
 		clear_bits |= EXTENT_LOCKED;
@@ -1777,6 +1794,64 @@ out:
 	return ret;
 }
 
+void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+			    int count)
+{
+	struct rb_node *node;
+	struct extent_state *state;
+
+	spin_lock(&tree->lock);
+	/*
+	 * this search will find all the extents that end after
+	 * our range starts.
+	 */
+	node = tree_search(tree, start);
+	BUG_ON(!node);
+
+	state = rb_entry(node, struct extent_state, rb_node);
+	BUG_ON(state->start != start);
+
+	while (count) {
+		state->private = *csums++;
+		count--;
+		state = next_state(state);
+	}
+	spin_unlock(&tree->lock);
+}
+
+static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
+{
+	struct bio_vec *bvec = bio->bi_io_vec + bio_index;
+
+	return page_offset(bvec->bv_page) + bvec->bv_offset;
+}
+
+void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
+			u32 csums[], int count)
+{
+	struct rb_node *node;
+	struct extent_state *state = NULL;
+	u64 start;
+
+	spin_lock(&tree->lock);
+	do {
+		start = __btrfs_get_bio_offset(bio, bio_index);
+		if (state == NULL || state->start != start) {
+			node = tree_search(tree, start);
+			BUG_ON(!node);
+
+			state = rb_entry(node, struct extent_state, rb_node);
+			BUG_ON(state->start != start);
+		}
+		state->private = *csums++;
+		count--;
+		bio_index++;
+
+		state = next_state(state);
+	} while (count);
+	spin_unlock(&tree->lock);
+}
+
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
 {
 	struct rb_node *node;
@@ -1811,7 +1886,7 @@ out:
  * range is found set.
  */
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		   int bits, int filled, struct extent_state *cached)
+		   unsigned long bits, int filled, struct extent_state *cached)
 {
 	struct extent_state *state = NULL;
 	struct rb_node *node;
@@ -2595,7 +2670,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	return ret;
 }
 
-void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
+static void attach_extent_buffer_page(struct extent_buffer *eb,
+				      struct page *page)
 {
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
@@ -2625,7 +2701,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 				   struct page *page,
 				   get_extent_t *get_extent,
 				   struct bio **bio, int mirror_num,
-				   unsigned long *bio_flags)
+				   unsigned long *bio_flags, int rw)
 {
 	struct inode *inode = page->mapping->host;
 	u64 start = page_offset(page);
@@ -2771,7 +2847,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 		}
 
 		pnr -= page->index;
-		ret = submit_extent_page(READ, tree, page,
+		ret = submit_extent_page(rw, tree, page,
 					 sector, disk_io_size, pg_offset,
 					 bdev, bio, pnr,
 					 end_bio_extent_readpage, mirror_num,
@@ -2804,7 +2880,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 	int ret;
 
 	ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
-				      &bio_flags);
+				      &bio_flags, READ);
 	if (bio)
 		ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
 	return ret;
@@ -3103,7 +3179,7 @@ static int eb_wait(void *word)
 	return 0;
 }
 
-static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 {
 	wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
 		    TASK_UNINTERRUPTIBLE);
@@ -3228,7 +3304,7 @@ static int write_one_eb(struct extent_buffer *eb,
 	u64 offset = eb->start;
 	unsigned long i, num_pages;
 	unsigned long bio_flags = 0;
-	int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+	int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
 	int ret = 0;
 
 	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@ -3665,14 +3741,14 @@ int extent_readpages(struct extent_io_tree *tree,
 			continue;
 		for (i = 0; i < nr; i++) {
 			__extent_read_full_page(tree, pagepool[i], get_extent,
-					&bio, 0, &bio_flags);
+					&bio, 0, &bio_flags, READ);
 			page_cache_release(pagepool[i]);
 		}
 		nr = 0;
 	}
 	for (i = 0; i < nr; i++) {
 		__extent_read_full_page(tree, pagepool[i], get_extent,
-					&bio, 0, &bio_flags);
+					&bio, 0, &bio_flags, READ);
 		page_cache_release(pagepool[i]);
 	}
 
@@ -3713,9 +3789,9 @@ int extent_invalidatepage(struct extent_io_tree *tree,
  * are locked or under IO and drops the related state bits if it is safe
  * to drop the page.
  */
-int try_release_extent_state(struct extent_map_tree *map,
-			     struct extent_io_tree *tree, struct page *page,
-			     gfp_t mask)
+static int try_release_extent_state(struct extent_map_tree *map,
+				    struct extent_io_tree *tree,
+				    struct page *page, gfp_t mask)
 {
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_CACHE_SIZE - 1;
@@ -4006,12 +4082,7 @@ out:
 
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
-#if LEAK_DEBUG
-	unsigned long flags;
-	spin_lock_irqsave(&leak_lock, flags);
-	list_del(&eb->leak_list);
-	spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+	btrfs_leak_debug_del(&eb->leak_list);
 	kmem_cache_free(extent_buffer_cache, eb);
 }
 
@@ -4021,9 +4092,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 						   gfp_t mask)
 {
 	struct extent_buffer *eb = NULL;
-#if LEAK_DEBUG
-	unsigned long flags;
-#endif
 
 	eb = kmem_cache_zalloc(extent_buffer_cache, mask);
 	if (eb == NULL)
@@ -4043,11 +4111,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 	init_waitqueue_head(&eb->write_lock_wq);
 	init_waitqueue_head(&eb->read_lock_wq);
 
-#if LEAK_DEBUG
-	spin_lock_irqsave(&leak_lock, flags);
-	list_add(&eb->leak_list, &buffers);
-	spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+	btrfs_leak_debug_add(&eb->leak_list, &buffers);
+
 	spin_lock_init(&eb->refs_lock);
 	atomic_set(&eb->refs, 1);
 	atomic_set(&eb->io_pages, 0);
@@ -4385,7 +4450,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
 }
 
 /* Expects to have eb->eb_lock already held */
-static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+static int release_extent_buffer(struct extent_buffer *eb)
 {
 	WARN_ON(atomic_read(&eb->refs) == 0);
 	if (atomic_dec_and_test(&eb->refs)) {
@@ -4443,7 +4508,7 @@ void free_extent_buffer(struct extent_buffer *eb)
 	 * I know this is terrible, but it's temporary until we stop tracking
 	 * the uptodate bits and such for the extent buffers.
 	 */
-	release_extent_buffer(eb, GFP_ATOMIC);
+	release_extent_buffer(eb);
 }
 
 void free_extent_buffer_stale(struct extent_buffer *eb)
@@ -4457,7 +4522,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
 	if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
 	    test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
 		atomic_dec(&eb->refs);
-	release_extent_buffer(eb, GFP_NOFS);
+	release_extent_buffer(eb);
 }
 
 void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@ -4509,17 +4574,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
 	return was_dirty;
 }
 
-static int range_straddles_pages(u64 start, u64 len)
-{
-	if (len < PAGE_CACHE_SIZE)
-		return 1;
-	if (start & (PAGE_CACHE_SIZE - 1))
-		return 1;
-	if ((start + len) & (PAGE_CACHE_SIZE - 1))
-		return 1;
-	return 0;
-}
-
 int clear_extent_buffer_uptodate(struct extent_buffer *eb)
 {
 	unsigned long i;
@@ -4551,37 +4605,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
 	return 0;
 }
 
-int extent_range_uptodate(struct extent_io_tree *tree,
-			  u64 start, u64 end)
-{
-	struct page *page;
-	int ret;
-	int pg_uptodate = 1;
-	int uptodate;
-	unsigned long index;
-
-	if (range_straddles_pages(start, end - start + 1)) {
-		ret = test_range_bit(tree, start, end,
-				     EXTENT_UPTODATE, 1, NULL);
-		if (ret)
-			return 1;
-	}
-	while (start <= end) {
-		index = start >> PAGE_CACHE_SHIFT;
-		page = find_get_page(tree->mapping, index);
-		if (!page)
-			return 1;
-		uptodate = PageUptodate(page);
-		page_cache_release(page);
-		if (!uptodate) {
-			pg_uptodate = 0;
-			break;
-		}
-		start += PAGE_CACHE_SIZE;
-	}
-	return pg_uptodate;
-}
-
 int extent_buffer_uptodate(struct extent_buffer *eb)
 {
 	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -4644,7 +4667,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 			ClearPageError(page);
 			err = __extent_read_full_page(tree, page,
 						      get_extent, &bio,
-						      mirror_num, &bio_flags);
+						      mirror_num, &bio_flags,
+						      READ | REQ_META);
 			if (err)
 				ret = err;
 		} else {
@@ -4653,7 +4677,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	}
 
 	if (bio) {
-		err = submit_one_bio(READ, bio, mirror_num, bio_flags);
+		err = submit_one_bio(READ | REQ_META, bio, mirror_num,
+				     bio_flags);
 		if (err)
 			return err;
 	}
@@ -5017,7 +5042,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	}
 }
 
-int try_release_extent_buffer(struct page *page, gfp_t mask)
+int try_release_extent_buffer(struct page *page)
 {
 	struct extent_buffer *eb;
 
@@ -5047,9 +5072,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
 	}
 	spin_unlock(&page->mapping->private_lock);
 
-	if ((mask & GFP_NOFS) == GFP_NOFS)
-		mask = GFP_NOFS;
-
 	/*
 	 * If tree ref isn't set then we know the ref on this eb is a real ref,
 	 * so just return, this page will likely be freed soon anyway.
@@ -5059,5 +5081,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
 		return 0;
 	}
 
-	return release_extent_buffer(eb, mask);
+	return release_extent_buffer(eb);
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 258c921..a2c03a1 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -81,9 +81,9 @@ struct extent_io_ops {
 	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
-			     int *bits);
+			     unsigned long *bits);
 	void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
-			       int *bits);
+			       unsigned long *bits);
 	void (*merge_extent_hook)(struct inode *inode,
 				  struct extent_state *new,
 				  struct extent_state *other);
@@ -116,7 +116,9 @@ struct extent_state {
 	/* for use by the FS */
 	u64 private;
 
+#ifdef CONFIG_BTRFS_DEBUG
 	struct list_head leak_list;
+#endif
 };
 
 #define INLINE_EXTENT_BUFFER_PAGES 16
@@ -132,7 +134,6 @@ struct extent_buffer {
 	atomic_t refs;
 	atomic_t io_pages;
 	int read_mirror;
-	struct list_head leak_list;
 	struct rcu_head rcu_head;
 	pid_t lock_owner;
 
@@ -159,6 +160,9 @@ struct extent_buffer {
 	wait_queue_head_t read_lock_wq;
 	wait_queue_head_t lock_wq;
 	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+#ifdef CONFIG_BTRFS_DEBUG
+	struct list_head leak_list;
+#endif
 };
 
 static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -185,13 +189,10 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 int try_release_extent_mapping(struct extent_map_tree *map,
 			       struct extent_io_tree *tree, struct page *page,
 			       gfp_t mask);
-int try_release_extent_buffer(struct page *page, gfp_t mask);
-int try_release_extent_state(struct extent_map_tree *map,
-			     struct extent_io_tree *tree, struct page *page,
-			     gfp_t mask);
+int try_release_extent_buffer(struct page *page);
 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		     int bits, struct extent_state **cached);
+		     unsigned long bits, struct extent_state **cached);
 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
 			 struct extent_state **cached, gfp_t mask);
@@ -207,16 +208,17 @@ u64 count_range_bits(struct extent_io_tree *tree,
 
 void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		   int bits, int filled, struct extent_state *cached_state);
+		   unsigned long bits, int filled,
+		   struct extent_state *cached_state);
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		      int bits, gfp_t mask);
+		      unsigned long bits, gfp_t mask);
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		     int bits, int wake, int delete, struct extent_state **cached,
-		     gfp_t mask);
+		     unsigned long bits, int wake, int delete,
+		     struct extent_state **cached, gfp_t mask);
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-		    int bits, gfp_t mask);
+		    unsigned long bits, gfp_t mask);
 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		   int bits, u64 *failed_start,
+		   unsigned long bits, u64 *failed_start,
 		   struct extent_state **cached_state, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
 			struct extent_state **cached_state, gfp_t mask);
@@ -229,17 +231,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 		       gfp_t mask);
 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-		       int bits, int clear_bits,
+		       unsigned long bits, unsigned long clear_bits,
 		       struct extent_state **cached_state, gfp_t mask);
 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
 			struct extent_state **cached_state, gfp_t mask);
 int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
 		      struct extent_state **cached_state, gfp_t mask);
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-			  u64 *start_ret, u64 *end_ret, int bits,
+			  u64 *start_ret, u64 *end_ret, unsigned long bits,
 			  struct extent_state **cached_state);
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
-						 u64 start, int bits);
 int extent_invalidatepage(struct extent_io_tree *tree,
 			  struct page *page, unsigned long offset);
 int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
@@ -261,6 +261,10 @@ int extent_readpages(struct extent_io_tree *tree,
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent);
 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
+void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+			    int count);
+void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
+			int bvec_index, u32 csums[], int count);
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
 void set_page_extent_mapped(struct page *page);
 
@@ -278,6 +282,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
 int read_extent_buffer_pages(struct extent_io_tree *tree,
 			     struct extent_buffer *eb, u64 start, int wait,
 			     get_extent_t *get_extent, int mirror_num);
+void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
 static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
@@ -313,7 +318,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 			   unsigned long src_offset, unsigned long len);
 void memset_extent_buffer(struct extent_buffer *eb, char c,
 			  unsigned long start, unsigned long len);
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
 void clear_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_buffer *eb);
@@ -323,8 +327,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
 		      unsigned long min_len, char **map,
 		      unsigned long *map_start,
 		      unsigned long *map_len);
-int extent_range_uptodate(struct extent_io_tree *tree,
-			  u64 start, u64 end);
 int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2834ca5..a4a7a1a 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -174,6 +174,14 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 	    test_bit(EXTENT_FLAG_LOGGING, &next->flags))
 		return 0;
 
+	/*
+	 * We don't want to merge stuff that hasn't been written to the log yet
+	 * since it may not reflect exactly what is on disk, and that would be
+	 * bad.
+	 */
+	if (!list_empty(&prev->list) || !list_empty(&next->list))
+		return 0;
+
 	if (extent_map_end(prev) == next->start &&
 	    prev->flags == next->flags &&
 	    prev->bdev == next->bdev &&
@@ -209,9 +217,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
 			em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
 			em->mod_start = merge->mod_start;
 			em->generation = max(em->generation, merge->generation);
-			list_move(&em->list, &tree->modified_extents);
 
-			list_del_init(&merge->list);
 			rb_erase(&merge->rb_node, &tree->map);
 			free_extent_map(merge);
 		}
@@ -227,7 +233,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
 		merge->in_tree = 0;
 		em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
 		em->generation = max(em->generation, merge->generation);
-		list_del_init(&merge->list);
 		free_extent_map(merge);
 	}
 }
@@ -302,7 +307,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
  * reference dropped if the merge attempt was successful.
  */
 int add_extent_mapping(struct extent_map_tree *tree,
-		       struct extent_map *em)
+		       struct extent_map *em, int modified)
 {
 	int ret = 0;
 	struct rb_node *rb;
@@ -324,7 +329,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
 	em->mod_start = em->start;
 	em->mod_len = em->len;
 
-	try_merge_map(tree, em);
+	if (modified)
+		list_move(&em->list, &tree->modified_extents);
+	else
+		try_merge_map(tree, em);
 out:
 	return ret;
 }
@@ -337,8 +345,9 @@ static u64 range_end(u64 start, u64 len)
 	return start + len;
 }
 
-struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
-					   u64 start, u64 len, int strict)
+static struct extent_map *
+__lookup_extent_mapping(struct extent_map_tree *tree,
+			u64 start, u64 len, int strict)
 {
 	struct extent_map *em;
 	struct rb_node *rb_node;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index c6598c8..61adc44 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,6 +26,7 @@ struct extent_map {
 	u64 mod_len;
 	u64 orig_start;
 	u64 orig_block_len;
+	u64 ram_bytes;
 	u64 block_start;
 	u64 block_len;
 	u64 generation;
@@ -61,7 +62,7 @@ void extent_map_tree_init(struct extent_map_tree *tree);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 					 u64 start, u64 len);
 int add_extent_mapping(struct extent_map_tree *tree,
-		       struct extent_map *em);
+		       struct extent_map *em, int modified);
 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
 
 struct extent_map *alloc_extent_map(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c4628a2..b193bf3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -83,10 +83,11 @@ out:
 	return ret;
 }
 
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, int cow)
+static struct btrfs_csum_item *
+btrfs_lookup_csum(struct btrfs_trans_handle *trans,
+		  struct btrfs_root *root,
+		  struct btrfs_path *path,
+		  u64 bytenr, int cow)
 {
 	int ret;
 	struct btrfs_key file_key;
@@ -152,32 +153,12 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-u64 btrfs_file_extent_length(struct btrfs_path *path)
-{
-	int extent_type;
-	struct btrfs_file_extent_item *fi;
-	u64 len;
-
-	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
-			    struct btrfs_file_extent_item);
-	extent_type = btrfs_file_extent_type(path->nodes[0], fi);
-
-	if (extent_type == BTRFS_FILE_EXTENT_REG ||
-	    extent_type == BTRFS_FILE_EXTENT_PREALLOC)
-		len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
-	else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
-		len = btrfs_file_extent_inline_len(path->nodes[0], fi);
-	else
-		BUG();
-
-	return len;
-}
-
 static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 				   struct inode *inode, struct bio *bio,
 				   u64 logical_offset, u32 *dst, int dio)
 {
-	u32 sum;
+	u32 sum[16];
+	int len;
 	struct bio_vec *bvec = bio->bi_io_vec;
 	int bio_index = 0;
 	u64 offset = 0;
@@ -186,7 +167,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 	u64 disk_bytenr;
 	u32 diff;
 	u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-	int ret;
+	int count;
 	struct btrfs_path *path;
 	struct btrfs_csum_item *item = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -214,10 +195,12 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 	if (dio)
 		offset = logical_offset;
 	while (bio_index < bio->bi_vcnt) {
+		len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
 		if (!dio)
 			offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-		ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
-		if (ret == 0)
+		count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
+					       len);
+		if (count)
 			goto found;
 
 		if (!item || disk_bytenr < item_start_offset ||
@@ -230,10 +213,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 			item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
 						 path, disk_bytenr, 0);
 			if (IS_ERR(item)) {
-				ret = PTR_ERR(item);
-				if (ret == -ENOENT || ret == -EFBIG)
-					ret = 0;
-				sum = 0;
+				count = 1;
+				sum[0] = 0;
 				if (BTRFS_I(inode)->root->root_key.objectid ==
 				    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 					set_extent_bits(io_tree, offset,
@@ -269,19 +250,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 		diff = disk_bytenr - item_start_offset;
 		diff = diff / root->sectorsize;
 		diff = diff * csum_size;
-
-		read_extent_buffer(path->nodes[0], &sum,
+		count = min_t(int, len, (item_last_offset - disk_bytenr) >>
+					inode->i_sb->s_blocksize_bits);
+		read_extent_buffer(path->nodes[0], sum,
 				   ((unsigned long)item) + diff,
-				   csum_size);
+				   csum_size * count);
 found:
-		if (dst)
-			*dst++ = sum;
-		else
-			set_state_private(io_tree, offset, sum);
-		disk_bytenr += bvec->bv_len;
-		offset += bvec->bv_len;
-		bio_index++;
-		bvec++;
+		if (dst) {
+			memcpy(dst, sum, count * csum_size);
+			dst += count;
+		} else {
+			if (dio)
+				extent_cache_csums_dio(io_tree, offset, sum,
+						       count);
+			else
+				extent_cache_csums(io_tree, bio, bio_index, sum,
+					    count);
+		}
+		while (count--) {
+			disk_bytenr += bvec->bv_len;
+			offset += bvec->bv_len;
+			bio_index++;
+			bvec++;
+		}
 	}
 	btrfs_free_path(path);
 	return 0;
@@ -358,11 +349,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 
 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
-		    key.type != BTRFS_EXTENT_CSUM_KEY)
-			break;
-
-		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-		if (key.offset > end)
+		    key.type != BTRFS_EXTENT_CSUM_KEY ||
+		    key.offset > end)
 			break;
 
 		if (key.offset > start)
@@ -484,8 +472,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
 
 		data = kmap_atomic(bvec->bv_page);
 		sector_sum->sum = ~(u32)0;
-		sector_sum->sum = btrfs_csum_data(root,
-						  data + bvec->bv_offset,
+		sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset,
 						  sector_sum->sum,
 						  bvec->bv_len);
 		kunmap_atomic(data);
@@ -518,8 +505,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
  * This calls btrfs_truncate_item with the correct args based on the
  * overlap, and fixes up the key as required.
  */
-static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
-				       struct btrfs_root *root,
+static noinline void truncate_one_csum(struct btrfs_root *root,
 				       struct btrfs_path *path,
 				       struct btrfs_key *key,
 				       u64 bytenr, u64 len)
@@ -544,7 +530,7 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
 		 */
 		u32 new_size = (bytenr - key->offset) >> blocksize_bits;
 		new_size *= csum_size;
-		btrfs_truncate_item(trans, root, path, new_size, 1);
+		btrfs_truncate_item(root, path, new_size, 1);
 	} else if (key->offset >= bytenr && csum_end > end_byte &&
 		   end_byte > key->offset) {
 		/*
@@ -556,10 +542,10 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
 		u32 new_size = (csum_end - end_byte) >> blocksize_bits;
 		new_size *= csum_size;
 
-		btrfs_truncate_item(trans, root, path, new_size, 0);
+		btrfs_truncate_item(root, path, new_size, 0);
 
 		key->offset = end_byte;
-		btrfs_set_item_key_safe(trans, root, path, key);
+		btrfs_set_item_key_safe(root, path, key);
 	} else {
 		BUG();
 	}
@@ -674,7 +660,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 
 			key.offset = end_byte - 1;
 		} else {
-			truncate_one_csum(trans, root, path, &key, bytenr, len);
+			truncate_one_csum(root, path, &key, bytenr, len);
 			if (key.offset < bytenr)
 				break;
 		}
@@ -835,7 +821,7 @@ again:
 		diff /= csum_size;
 		diff *= csum_size;
 
-		btrfs_extend_item(trans, root, path, diff);
+		btrfs_extend_item(root, path, diff);
 		goto csum;
 	}
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc4d54c..4205ba7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -193,8 +193,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
  * the same inode in the tree, we will merge them together (by
  * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
  */
-void btrfs_requeue_inode_defrag(struct inode *inode,
-				struct inode_defrag *defrag)
+static void btrfs_requeue_inode_defrag(struct inode *inode,
+				       struct inode_defrag *defrag)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
@@ -474,7 +474,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 /*
  * unlocks pages after btrfs_file_write is done with them
  */
-void btrfs_drop_pages(struct page **pages, size_t num_pages)
+static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
@@ -498,9 +498,9 @@ void btrfs_drop_pages(struct page **pages, size_t num_pages)
  * doing real data extents, marking pages dirty and delalloc as required.
  */
 int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
-		      struct page **pages, size_t num_pages,
-		      loff_t pos, size_t write_bytes,
-		      struct extent_state **cached)
+			     struct page **pages, size_t num_pages,
+			     loff_t pos, size_t write_bytes,
+			     struct extent_state **cached)
 {
 	int err = 0;
 	int i;
@@ -553,6 +553,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 	int testend = 1;
 	unsigned long flags;
 	int compressed = 0;
+	bool modified;
 
 	WARN_ON(end < start);
 	if (end == (u64)-1) {
@@ -562,6 +563,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 	while (1) {
 		int no_splits = 0;
 
+		modified = false;
 		if (!split)
 			split = alloc_extent_map();
 		if (!split2)
@@ -593,6 +595,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 		clear_bit(EXTENT_FLAG_LOGGING, &flags);
+		modified = !list_empty(&em->list);
 		remove_extent_mapping(em_tree, em);
 		if (no_splits)
 			goto next;
@@ -608,15 +611,15 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 				split->block_len = em->block_len;
 			else
 				split->block_len = split->len;
+			split->ram_bytes = em->ram_bytes;
 			split->orig_block_len = max(split->block_len,
 						    em->orig_block_len);
 			split->generation = gen;
 			split->bdev = em->bdev;
 			split->flags = flags;
 			split->compress_type = em->compress_type;
-			ret = add_extent_mapping(em_tree, split);
+			ret = add_extent_mapping(em_tree, split, modified);
 			BUG_ON(ret); /* Logic error */
-			list_move(&split->list, &em_tree->modified_extents);
 			free_extent_map(split);
 			split = split2;
 			split2 = NULL;
@@ -633,6 +636,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			split->generation = gen;
 			split->orig_block_len = max(em->block_len,
 						    em->orig_block_len);
+			split->ram_bytes = em->ram_bytes;
 
 			if (compressed) {
 				split->block_len = em->block_len;
@@ -644,9 +648,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 				split->orig_start = em->orig_start;
 			}
 
-			ret = add_extent_mapping(em_tree, split);
+			ret = add_extent_mapping(em_tree, split, modified);
 			BUG_ON(ret); /* Logic error */
-			list_move(&split->list, &em_tree->modified_extents);
 			free_extent_map(split);
 			split = NULL;
 		}
@@ -822,7 +825,7 @@ next_slot:
 
 			memcpy(&new_key, &key, sizeof(new_key));
 			new_key.offset = end;
-			btrfs_set_item_key_safe(trans, root, path, &new_key);
+			btrfs_set_item_key_safe(root, path, &new_key);
 
 			extent_offset += end - key.offset;
 			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1038,7 +1041,7 @@ again:
 				     ino, bytenr, orig_offset,
 				     &other_start, &other_end)) {
 			new_key.offset = end;
-			btrfs_set_item_key_safe(trans, root, path, &new_key);
+			btrfs_set_item_key_safe(root, path, &new_key);
 			fi = btrfs_item_ptr(leaf, path->slots[0],
 					    struct btrfs_file_extent_item);
 			btrfs_set_file_extent_generation(leaf, fi,
@@ -1072,7 +1075,7 @@ again:
 							 trans->transid);
 			path->slots[0]++;
 			new_key.offset = start;
-			btrfs_set_item_key_safe(trans, root, path, &new_key);
+			btrfs_set_item_key_safe(root, path, &new_key);
 
 			fi = btrfs_item_ptr(leaf, path->slots[0],
 					    struct btrfs_file_extent_item);
@@ -1883,7 +1886,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 
 		path->slots[0]++;
 		key.offset = offset;
-		btrfs_set_item_key_safe(trans, root, path, &key);
+		btrfs_set_item_key_safe(root, path, &key);
 		fi = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_file_extent_item);
 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -1913,6 +1916,7 @@ out:
 	} else {
 		hole_em->start = offset;
 		hole_em->len = end - offset;
+		hole_em->ram_bytes = hole_em->len;
 		hole_em->orig_start = offset;
 
 		hole_em->block_start = EXTENT_MAP_HOLE;
@@ -1925,10 +1929,7 @@ out:
 		do {
 			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
 			write_lock(&em_tree->lock);
-			ret = add_extent_mapping(em_tree, hole_em);
-			if (!ret)
-				list_move(&hole_em->list,
-					  &em_tree->modified_extents);
+			ret = add_extent_mapping(em_tree, hole_em, 1);
 			write_unlock(&em_tree->lock);
 		} while (ret == -EEXIST);
 		free_extent_map(hole_em);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1f84fc0..ecca6c7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -104,7 +104,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
 
 	spin_lock(&block_group->lock);
 	if (!((BTRFS_I(inode)->flags & flags) == flags)) {
-		printk(KERN_INFO "Old style space inode found, converting.\n");
+		btrfs_info(root->fs_info,
+			"Old style space inode found, converting.");
 		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
 			BTRFS_INODE_NODATACOW;
 		block_group->disk_cache_state = BTRFS_DC_CLEAR;
@@ -119,9 +120,10 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
 	return inode;
 }
 
-int __create_free_space_inode(struct btrfs_root *root,
-			      struct btrfs_trans_handle *trans,
-			      struct btrfs_path *path, u64 ino, u64 offset)
+static int __create_free_space_inode(struct btrfs_root *root,
+				     struct btrfs_trans_handle *trans,
+				     struct btrfs_path *path,
+				     u64 ino, u64 offset)
 {
 	struct btrfs_key key;
 	struct btrfs_disk_key disk_key;
@@ -431,7 +433,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
 	if (index == 0)
 		offset = sizeof(u32) * io_ctl->num_pages;
 
-	crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
+	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
 			      PAGE_CACHE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	io_ctl_unmap_page(io_ctl);
@@ -461,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
 	kunmap(io_ctl->pages[0]);
 
 	io_ctl_map_page(io_ctl, 0);
-	crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
+	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
 			      PAGE_CACHE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	if (val != crc) {
@@ -624,9 +626,9 @@ next:
 	spin_unlock(&ctl->tree_lock);
 }
 
-int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
-			    struct btrfs_free_space_ctl *ctl,
-			    struct btrfs_path *path, u64 offset)
+static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
+				   struct btrfs_free_space_ctl *ctl,
+				   struct btrfs_path *path, u64 offset)
 {
 	struct btrfs_free_space_header *header;
 	struct extent_buffer *leaf;
@@ -669,10 +671,11 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 	btrfs_release_path(path);
 
 	if (BTRFS_I(inode)->generation != generation) {
-		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
-		       " not match free space cache generation (%llu)\n",
-		       (unsigned long long)BTRFS_I(inode)->generation,
-		       (unsigned long long)generation);
+		btrfs_err(root->fs_info,
+			"free space inode generation (%llu) "
+			"did not match free space cache generation (%llu)",
+			(unsigned long long)BTRFS_I(inode)->generation,
+			(unsigned long long)generation);
 		return 0;
 	}
 
@@ -721,8 +724,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 			ret = link_free_space(ctl, e);
 			spin_unlock(&ctl->tree_lock);
 			if (ret) {
-				printk(KERN_ERR "Duplicate entries in "
-				       "free space cache, dumping\n");
+				btrfs_err(root->fs_info,
+					"Duplicate entries in free space cache, dumping");
 				kmem_cache_free(btrfs_free_space_cachep, e);
 				goto free_cache;
 			}
@@ -741,8 +744,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 			ctl->op->recalc_thresholds(ctl);
 			spin_unlock(&ctl->tree_lock);
 			if (ret) {
-				printk(KERN_ERR "Duplicate entries in "
-				       "free space cache, dumping\n");
+				btrfs_err(root->fs_info,
+					"Duplicate entries in free space cache, dumping");
 				kmem_cache_free(btrfs_free_space_cachep, e);
 				goto free_cache;
 			}
@@ -833,8 +836,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 
 	if (!matched) {
 		__btrfs_remove_free_space_cache(ctl);
-		printk(KERN_ERR "block group %llu has an wrong amount of free "
-		       "space\n", block_group->key.objectid);
+		btrfs_err(fs_info, "block group %llu has wrong amount of free space",
+			block_group->key.objectid);
 		ret = -1;
 	}
 out:
@@ -845,8 +848,8 @@ out:
 		spin_unlock(&block_group->lock);
 		ret = 0;
 
-		printk(KERN_ERR "btrfs: failed to load free space cache "
-		       "for block group %llu\n", block_group->key.objectid);
+		btrfs_err(fs_info, "failed to load free space cache for block group %llu",
+			block_group->key.objectid);
 	}
 
 	iput(inode);
@@ -866,11 +869,11 @@ out:
  * on mount.  This will return 0 if it was successfull in writing the cache out,
  * and -1 if it was not.
  */
-int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
-			    struct btrfs_free_space_ctl *ctl,
-			    struct btrfs_block_group_cache *block_group,
-			    struct btrfs_trans_handle *trans,
-			    struct btrfs_path *path, u64 offset)
+static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
+				   struct btrfs_free_space_ctl *ctl,
+				   struct btrfs_block_group_cache *block_group,
+				   struct btrfs_trans_handle *trans,
+				   struct btrfs_path *path, u64 offset)
 {
 	struct btrfs_free_space_header *header;
 	struct extent_buffer *leaf;
@@ -1104,8 +1107,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		spin_unlock(&block_group->lock);
 		ret = 0;
 #ifdef DEBUG
-		printk(KERN_ERR "btrfs: failed to write free space cache "
-		       "for block group %llu\n", block_group->key.objectid);
+		btrfs_err(root->fs_info,
+			"failed to write free space cache for block group %llu",
+			block_group->key.objectid);
 #endif
 	}
 
@@ -1564,7 +1568,8 @@ again:
 	search_bytes = ctl->unit;
 	search_bytes = min(search_bytes, end - search_start + 1);
 	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
-	BUG_ON(ret < 0 || search_start != *offset);
+	if (ret < 0 || search_start != *offset)
+		return -EINVAL;
 
 	/* We may have found more bits than what we need */
 	search_bytes = min(search_bytes, *bytes);
@@ -1970,7 +1975,6 @@ again:
 		re_search = true;
 		goto again;
 	}
-	BUG_ON(ret); /* logic error */
 out_lock:
 	spin_unlock(&ctl->tree_lock);
 out:
@@ -2064,7 +2068,8 @@ out:
 	return 0;
 }
 
-void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
+static void __btrfs_remove_free_space_cache_locked(
+				struct btrfs_free_space_ctl *ctl)
 {
 	struct btrfs_free_space *info;
 	struct rb_node *node;
@@ -2931,8 +2936,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 	ret = __load_free_space_cache(root, inode, ctl, path, 0);
 
 	if (ret < 0)
-		printk(KERN_ERR "btrfs: failed to load free ino cache for "
-		       "root %llu\n", root->root_key.objectid);
+		btrfs_err(fs_info,
+			"failed to load free ino cache for root %llu",
+			root->root_key.objectid);
 out_put:
 	iput(inode);
 out:
@@ -2959,11 +2965,531 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 	if (ret) {
 		btrfs_delalloc_release_metadata(inode, inode->i_size);
 #ifdef DEBUG
-		printk(KERN_ERR "btrfs: failed to write free ino cache "
-		       "for root %llu\n", root->root_key.objectid);
+		btrfs_err(root->fs_info,
+			"failed to write free ino cache for root %llu",
+			root->root_key.objectid);
 #endif
 	}
 
 	iput(inode);
 	return ret;
 }
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+static struct btrfs_block_group_cache *init_test_block_group(void)
+{
+	struct btrfs_block_group_cache *cache;
+
+	cache = kzalloc(sizeof(*cache), GFP_NOFS);
+	if (!cache)
+		return NULL;
+	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+					GFP_NOFS);
+	if (!cache->free_space_ctl) {
+		kfree(cache);
+		return NULL;
+	}
+
+	cache->key.objectid = 0;
+	cache->key.offset = 1024 * 1024 * 1024;
+	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+	cache->sectorsize = 4096;
+
+	spin_lock_init(&cache->lock);
+	INIT_LIST_HEAD(&cache->list);
+	INIT_LIST_HEAD(&cache->cluster_list);
+	INIT_LIST_HEAD(&cache->new_bg_list);
+
+	btrfs_init_free_space_ctl(cache);
+
+	return cache;
+}
+
+/*
+ * Checks to see if the given range is in the free space cache.  This is really
+ * just used to check the absence of space, so if there is free space in the
+ * range at all we will return 1.
+ */
+static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
+			u64 bytes)
+{
+	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+	struct btrfs_free_space *info;
+	int ret = 0;
+
+	spin_lock(&ctl->tree_lock);
+	info = tree_search_offset(ctl, offset, 0, 0);
+	if (!info) {
+		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+					  1, 0);
+		if (!info)
+			goto out;
+	}
+
+have_info:
+	if (info->bitmap) {
+		u64 bit_off, bit_bytes;
+		struct rb_node *n;
+		struct btrfs_free_space *tmp;
+
+		bit_off = offset;
+		bit_bytes = ctl->unit;
+		ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
+		if (!ret) {
+			if (bit_off == offset) {
+				ret = 1;
+				goto out;
+			} else if (bit_off > offset &&
+				   offset + bytes > bit_off) {
+				ret = 1;
+				goto out;
+			}
+		}
+
+		n = rb_prev(&info->offset_index);
+		while (n) {
+			tmp = rb_entry(n, struct btrfs_free_space,
+				       offset_index);
+			if (tmp->offset + tmp->bytes < offset)
+				break;
+			if (offset + bytes < tmp->offset) {
+				n = rb_prev(&info->offset_index);
+				continue;
+			}
+			info = tmp;
+			goto have_info;
+		}
+
+		n = rb_next(&info->offset_index);
+		while (n) {
+			tmp = rb_entry(n, struct btrfs_free_space,
+				       offset_index);
+			if (offset + bytes < tmp->offset)
+				break;
+			if (tmp->offset + tmp->bytes < offset) {
+				n = rb_next(&info->offset_index);
+				continue;
+			}
+			info = tmp;
+			goto have_info;
+		}
+
+		goto out;
+	}
+
+	if (info->offset == offset) {
+		ret = 1;
+		goto out;
+	}
+
+	if (offset > info->offset && offset < info->offset + info->bytes)
+		ret = 1;
+out:
+	spin_unlock(&ctl->tree_lock);
+	return ret;
+}
+
+/*
+ * Use this if you need to make a bitmap or extent entry specifically, it
+ * doesn't do any of the merging that add_free_space does, this acts a lot like
+ * how the free space cache loading stuff works, so you can get really weird
+ * configurations.
+ */
+static int add_free_space_entry(struct btrfs_block_group_cache *cache,
+				u64 offset, u64 bytes, bool bitmap)
+{
+	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+	struct btrfs_free_space *info = NULL, *bitmap_info;
+	void *map = NULL;
+	u64 bytes_added;
+	int ret;
+
+again:
+	if (!info) {
+		info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+		if (!info)
+			return -ENOMEM;
+	}
+
+	if (!bitmap) {
+		spin_lock(&ctl->tree_lock);
+		info->offset = offset;
+		info->bytes = bytes;
+		ret = link_free_space(ctl, info);
+		spin_unlock(&ctl->tree_lock);
+		if (ret)
+			kmem_cache_free(btrfs_free_space_cachep, info);
+		return ret;
+	}
+
+	if (!map) {
+		map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+		if (!map) {
+			kmem_cache_free(btrfs_free_space_cachep, info);
+			return -ENOMEM;
+		}
+	}
+
+	spin_lock(&ctl->tree_lock);
+	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+					 1, 0);
+	if (!bitmap_info) {
+		info->bitmap = map;
+		map = NULL;
+		add_new_bitmap(ctl, info, offset);
+		bitmap_info = info;
+	}
+
+	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+	bytes -= bytes_added;
+	offset += bytes_added;
+	spin_unlock(&ctl->tree_lock);
+
+	if (bytes)
+		goto again;
+
+	if (map)
+		kfree(map);
+	return 0;
+}
+
+/*
+ * This test just does basic sanity checking, making sure we can add an exten
+ * entry and remove space from either end and the middle, and make sure we can
+ * remove space that covers adjacent extent entries.
+ */
+static int test_extents(struct btrfs_block_group_cache *cache)
+{
+	int ret = 0;
+
+	printk(KERN_ERR "Running extent only tests\n");
+
+	/* First just make sure we can remove an entire entry */
+	ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error adding initial extents %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error removing extent %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 0, 4 * 1024 * 1024)) {
+		printk(KERN_ERR "Full remove left some lingering space\n");
+		return -1;
+	}
+
+	/* Ok edge and middle cases now */
+	ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error adding half extent %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error removing tail end %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error removing front end %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
+	if (ret) {
+		printk(KERN_ERR "Error removing middle peice %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 0, 1 * 1024 * 1024)) {
+		printk(KERN_ERR "Still have space at the front\n");
+		return -1;
+	}
+
+	if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
+		printk(KERN_ERR "Still have space in the middle\n");
+		return -1;
+	}
+
+	if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
+		printk(KERN_ERR "Still have space at the end\n");
+		return -1;
+	}
+
+	/* Cleanup */
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+	return 0;
+}
+
+static int test_bitmaps(struct btrfs_block_group_cache *cache)
+{
+	u64 next_bitmap_offset;
+	int ret;
+
+	printk(KERN_ERR "Running bitmap only tests\n");
+
+	ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error removing bitmap full range %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 0, 4 * 1024 * 1024)) {
+		printk(KERN_ERR "Left some space in bitmap\n");
+		return -1;
+	}
+
+	ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * The first bitmap we have starts at offset 0 so the next one is just
+	 * at the end of the first bitmap.
+	 */
+	next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+
+	/* Test a bit straddling two bitmaps */
+	ret = add_free_space_entry(cache, next_bitmap_offset -
+				   (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add space that straddles two bitmaps"
+		       " %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, next_bitmap_offset -
+				      (1 * 1024 * 1024), 2 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
+			 2 * 1024 * 1024)) {
+		printk(KERN_ERR "Left some space when removing overlapping\n");
+		return -1;
+	}
+
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+	return 0;
+}
+
+/* This is the high grade jackassery */
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+{
+	u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+	int ret;
+
+	printk(KERN_ERR "Running bitmap and extent tests\n");
+
+	/*
+	 * First let's do something simple, an extent at the same offset as the
+	 * bitmap, but the free space completely in the extent and then
+	 * completely in the bitmap.
+	 */
+	ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret);
+		return ret;
+	}
+
+	ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Couldn't remove extent entry %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 0, 1 * 1024 * 1024)) {
+		printk(KERN_ERR "Left remnants after our remove\n");
+		return -1;
+	}
+
+	/* Now to add back the extent entry and remove from the bitmap */
+	ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+	if (ret) {
+		printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
+		printk(KERN_ERR "Left remnants in the bitmap\n");
+		return -1;
+	}
+
+	/*
+	 * Ok so a little more evil, extent entry and bitmap at the same offset,
+	 * removing an overlapping chunk.
+	 */
+	ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
+		printk(KERN_ERR "Left over peices after removing "
+		       "overlapping\n");
+		return -1;
+	}
+
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+	/* Now with the extent entry offset into the bitmap */
+	ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret);
+		return ret;
+	}
+
+	ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Problem removing overlapping space %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
+		printk(KERN_ERR "Left something behind when removing space");
+		return -1;
+	}
+
+	/*
+	 * This has blown up in the past, the extent entry starts before the
+	 * bitmap entry, but we're trying to remove an offset that falls
+	 * completely within the bitmap range and is in both the extent entry
+	 * and the bitmap entry, looks like this
+	 *
+	 *   [ extent ]
+	 *      [ bitmap ]
+	 *        [ del ]
+	 */
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+	ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
+				   4 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add bitmap %d\n", ret);
+		return ret;
+	}
+
+	ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
+				   5 * 1024 * 1024, 0);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
+				      5 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Failed to free our space %d\n", ret);
+		return ret;
+	}
+
+	if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
+			 5 * 1024 * 1024)) {
+		printk(KERN_ERR "Left stuff over\n");
+		return -1;
+	}
+
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+	/*
+	 * This blew up before, we have part of the free space in a bitmap and
+	 * then the entirety of the rest of the space in an extent.  This used
+	 * to return -EAGAIN back from btrfs_remove_extent, make sure this
+	 * doesn't happen.
+	 */
+	ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret);
+		return ret;
+	}
+
+	ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
+	if (ret) {
+		printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
+	if (ret) {
+		printk(KERN_ERR "Error removing bitmap and extent "
+		       "overlapping %d\n", ret);
+		return ret;
+	}
+
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+	return 0;
+}
+
+void btrfs_test_free_space_cache(void)
+{
+	struct btrfs_block_group_cache *cache;
+
+	printk(KERN_ERR "Running btrfs free space cache tests\n");
+
+	cache = init_test_block_group();
+	if (!cache) {
+		printk(KERN_ERR "Couldn't run the tests\n");
+		return;
+	}
+
+	if (test_extents(cache))
+		goto out;
+	if (test_bitmaps(cache))
+		goto out;
+	if (test_bitmaps_and_extents(cache))
+		goto out;
+out:
+	__btrfs_remove_free_space_cache(cache->free_space_ctl);
+	kfree(cache->free_space_ctl);
+	kfree(cache);
+	printk(KERN_ERR "Free space cache tests finished\n");
+}
+#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 8f2613f..4dc17d8 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -110,4 +110,9 @@ int btrfs_return_cluster_to_free_space(
 			       struct btrfs_free_cluster *cluster);
 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
 			   u64 *trimmed, u64 start, u64 end, u64 minlen);
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_free_space_cache(void);
+#endif
+
 #endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 48b8fda..e0b7034 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -183,10 +183,11 @@ int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
 	return -ENOENT;
 }
 
-int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root,
-			   const char *name, int name_len,
-			   u64 inode_objectid, u64 ref_objectid, u64 *index)
+static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  const char *name, int name_len,
+				  u64 inode_objectid, u64 ref_objectid,
+				  u64 *index)
 {
 	struct btrfs_path *path;
 	struct btrfs_key key;
@@ -246,7 +247,7 @@ int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
 	memmove_extent_buffer(leaf, ptr, ptr + del_len,
 			      item_size - (ptr + del_len - item_start));
 
-	btrfs_truncate_item(trans, root, path, item_size - del_len, 1);
+	btrfs_truncate_item(root, path, item_size - del_len, 1);
 
 out:
 	btrfs_free_path(path);
@@ -309,7 +310,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
 	item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
 	memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
 			      item_size - (ptr + sub_item_len - item_start));
-	btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1);
+	btrfs_truncate_item(root, path, item_size - sub_item_len, 1);
 out:
 	btrfs_free_path(path);
 
@@ -361,7 +362,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
 						   name, name_len, NULL))
 			goto out;
 
-		btrfs_extend_item(trans, root, path, ins_len);
+		btrfs_extend_item(root, path, ins_len);
 		ret = 0;
 	}
 	if (ret < 0)
@@ -417,7 +418,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
 			goto out;
 
 		old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
-		btrfs_extend_item(trans, root, path, ins_len);
+		btrfs_extend_item(root, path, ins_len);
 		ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				     struct btrfs_inode_ref);
 		ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 898da0a..9b31b3b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -101,7 +101,10 @@ static noinline int cow_file_range(struct inode *inode,
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 					   u64 len, u64 orig_start,
 					   u64 block_start, u64 block_len,
-					   u64 orig_block_len, int type);
+					   u64 orig_block_len, u64 ram_bytes,
+					   int type);
+
+static int btrfs_dirty_inode(struct inode *inode);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 				     struct inode *inode,  struct inode *dir,
@@ -723,6 +726,7 @@ retry:
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
 		em->orig_block_len = ins.offset;
+		em->ram_bytes = async_extent->ram_size;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		em->compress_type = async_extent->compress_type;
 		set_bit(EXTENT_FLAG_PINNED, &em->flags);
@@ -731,10 +735,7 @@ retry:
 
 		while (1) {
 			write_lock(&em_tree->lock);
-			ret = add_extent_mapping(em_tree, em);
-			if (!ret)
-				list_move(&em->list,
-					  &em_tree->modified_extents);
+			ret = add_extent_mapping(em_tree, em, 1);
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST) {
 				free_extent_map(em);
@@ -922,7 +923,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 		}
 
 		em = alloc_extent_map();
-		BUG_ON(!em); /* -ENOMEM */
+		if (!em)
+			goto out_reserve;
 		em->start = start;
 		em->orig_start = em->start;
 		ram_size = ins.offset;
@@ -933,16 +935,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
 		em->orig_block_len = ins.offset;
+		em->ram_bytes = ram_size;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		set_bit(EXTENT_FLAG_PINNED, &em->flags);
 		em->generation = -1;
 
 		while (1) {
 			write_lock(&em_tree->lock);
-			ret = add_extent_mapping(em_tree, em);
-			if (!ret)
-				list_move(&em->list,
-					  &em_tree->modified_extents);
+			ret = add_extent_mapping(em_tree, em, 1);
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST) {
 				free_extent_map(em);
@@ -951,11 +951,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 			btrfs_drop_extent_cache(inode, start,
 						start + ram_size - 1, 0);
 		}
+		if (ret)
+			goto out_reserve;
 
 		cur_alloc_size = ins.offset;
 		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
 					       ram_size, cur_alloc_size, 0);
-		BUG_ON(ret); /* -ENOMEM */
+		if (ret)
+			goto out_reserve;
 
 		if (root->root_key.objectid ==
 		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
@@ -963,7 +966,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 						      cur_alloc_size);
 			if (ret) {
 				btrfs_abort_transaction(trans, root, ret);
-				goto out_unlock;
+				goto out_reserve;
 			}
 		}
 
@@ -992,6 +995,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 out:
 	return ret;
 
+out_reserve:
+	btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
 out_unlock:
 	extent_clear_unlock_delalloc(inode,
 		     &BTRFS_I(inode)->io_tree,
@@ -1195,6 +1200,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	u64 disk_bytenr;
 	u64 num_bytes;
 	u64 disk_num_bytes;
+	u64 ram_bytes;
 	int extent_type;
 	int ret, err;
 	int type;
@@ -1291,6 +1297,7 @@ next_slot:
 				    struct btrfs_file_extent_item);
 		extent_type = btrfs_file_extent_type(leaf, fi);
 
+		ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 		if (extent_type == BTRFS_FILE_EXTENT_REG ||
 		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
 			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -1374,6 +1381,7 @@ out_check:
 			em->block_len = num_bytes;
 			em->block_start = disk_bytenr;
 			em->orig_block_len = disk_num_bytes;
+			em->ram_bytes = ram_bytes;
 			em->bdev = root->fs_info->fs_devices->latest_bdev;
 			em->mod_start = em->start;
 			em->mod_len = em->len;
@@ -1382,10 +1390,7 @@ out_check:
 			em->generation = -1;
 			while (1) {
 				write_lock(&em_tree->lock);
-				ret = add_extent_mapping(em_tree, em);
-				if (!ret)
-					list_move(&em->list,
-						  &em_tree->modified_extents);
+				ret = add_extent_mapping(em_tree, em, 1);
 				write_unlock(&em_tree->lock);
 				if (ret != -EEXIST) {
 					free_extent_map(em);
@@ -1526,7 +1531,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
  * have pending delalloc work to be done.
  */
 static void btrfs_set_bit_hook(struct inode *inode,
-			       struct extent_state *state, int *bits)
+			       struct extent_state *state, unsigned long *bits)
 {
 
 	/*
@@ -1570,7 +1575,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
  * extent_io.c clear_bit_hook, see set_bit_hook for why
  */
 static void btrfs_clear_bit_hook(struct inode *inode,
-				 struct extent_state *state, int *bits)
+				 struct extent_state *state,
+				 unsigned long *bits)
 {
 	/*
 	 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -2794,6 +2800,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	int ret;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u32 csum = ~(u32)0;
+	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+	                              DEFAULT_RATELIMIT_BURST);
 
 	if (PageChecked(page)) {
 		ClearPageChecked(page);
@@ -2820,7 +2828,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	if (ret)
 		goto zeroit;
 
-	csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
+	csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
 	btrfs_csum_final(csum, (char *)&csum);
 	if (csum != private)
 		goto zeroit;
@@ -2830,11 +2838,11 @@ good:
 	return 0;
 
 zeroit:
-	printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
-		       "private %llu\n",
-		       (unsigned long long)btrfs_ino(page->mapping->host),
-		       (unsigned long long)start, csum,
-		       (unsigned long long)private);
+	if (__ratelimit(&_rs))
+		btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
+			(unsigned long long)btrfs_ino(page->mapping->host),
+			(unsigned long long)start, csum,
+			(unsigned long long)private);
 	memset(kaddr + offset, 1, end - start + 1);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
@@ -3020,7 +3028,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
  * We have done the truncate/delete so we can go ahead and remove the orphan
  * item for this particular inode.
  */
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
+static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
+			    struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int delete_item = 0;
@@ -3115,8 +3124,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 		 */
 
 		if (found_key.offset == last_objectid) {
-			printk(KERN_ERR "btrfs: Error removing orphan entry, "
-			       "stopping orphan cleanup\n");
+			btrfs_err(root->fs_info,
+				"Error removing orphan entry, stopping orphan cleanup");
 			ret = -EINVAL;
 			goto out;
 		}
@@ -3173,8 +3182,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 				ret = PTR_ERR(trans);
 				goto out;
 			}
-			printk(KERN_ERR "auto deleting %Lu\n",
-			       found_key.objectid);
+			btrfs_debug(root->fs_info, "auto deleting %Lu",
+				found_key.objectid);
 			ret = btrfs_del_orphan_item(trans, root,
 						    found_key.objectid);
 			BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -3238,13 +3247,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 	}
 
 	if (nr_unlink)
-		printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
+		btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
 	if (nr_truncate)
-		printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+		btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
 
 out:
 	if (ret)
-		printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+		btrfs_crit(root->fs_info,
+			"could not do orphan cleanup %d", ret);
 	btrfs_free_path(path);
 	return ret;
 }
@@ -3592,9 +3602,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 	ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
 				  dir_ino, &index);
 	if (ret) {
-		printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
-		       "inode %llu parent %llu\n", name_len, name,
-		       (unsigned long long)ino, (unsigned long long)dir_ino);
+		btrfs_info(root->fs_info,
+			"failed to delete reference to %.*s, inode %llu parent %llu",
+			name_len, name,
+			(unsigned long long)ino, (unsigned long long)dir_ino);
 		btrfs_abort_transaction(trans, root, ret);
 		goto err;
 	}
@@ -3616,6 +3627,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 					   dir, index);
 	if (ret == -ENOENT)
 		ret = 0;
+	else if (ret)
+		btrfs_abort_transaction(trans, root, ret);
 err:
 	btrfs_free_path(path);
 	if (ret)
@@ -3661,7 +3674,7 @@ static int check_path_shared(struct btrfs_root *root,
 		eb = path->nodes[level];
 		if (!btrfs_block_can_be_shared(root, eb))
 			continue;
-		ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
+		ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
 					       &refs, NULL);
 		if (refs > 1)
 			return 1;
@@ -4176,8 +4189,7 @@ search_again:
 				}
 				size =
 				    btrfs_file_extent_calc_inline_size(size);
-				btrfs_truncate_item(trans, root, path,
-						    size, 1);
+				btrfs_truncate_item(root, path, size, 1);
 			} else if (root->ref_cows) {
 				inode_sub_bytes(inode, item_end + 1 -
 						found_key.offset);
@@ -4451,16 +4463,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 			hole_em->block_start = EXTENT_MAP_HOLE;
 			hole_em->block_len = 0;
 			hole_em->orig_block_len = 0;
+			hole_em->ram_bytes = hole_size;
 			hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
 			hole_em->compress_type = BTRFS_COMPRESS_NONE;
 			hole_em->generation = trans->transid;
 
 			while (1) {
 				write_lock(&em_tree->lock);
-				err = add_extent_mapping(em_tree, hole_em);
-				if (!err)
-					list_move(&hole_em->list,
-						  &em_tree->modified_extents);
+				err = add_extent_mapping(em_tree, hole_em, 1);
 				write_unlock(&em_tree->lock);
 				if (err != -EEXIST)
 					break;
@@ -4671,8 +4681,9 @@ void btrfs_evict_inode(struct inode *inode)
 			ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
 
 		if (ret) {
-			printk(KERN_WARNING "Could not get space for a "
-			       "delete, will truncate on mount %d\n", ret);
+			btrfs_warn(root->fs_info,
+				"Could not get space for a delete, will truncate on mount %d",
+				ret);
 			btrfs_orphan_del(NULL, inode);
 			btrfs_free_block_rsv(root, rsv);
 			goto no_delete;
@@ -5336,7 +5347,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
  * FIXME, needs more benchmarking...there are no reasons other than performance
  * to keep or drop this code.
  */
-int btrfs_dirty_inode(struct inode *inode)
+static int btrfs_dirty_inode(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
@@ -5978,7 +5989,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
 		em->block_start += start_diff;
 		em->block_len -= start_diff;
 	}
-	return add_extent_mapping(em_tree, em);
+	return add_extent_mapping(em_tree, em, 0);
 }
 
 static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6152,6 +6163,7 @@ again:
 		goto not_found_em;
 	}
 
+	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 		em->start = extent_start;
@@ -6260,18 +6272,18 @@ not_found_em:
 insert:
 	btrfs_release_path(path);
 	if (em->start > start || extent_map_end(em) <= start) {
-		printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
-		       "[%llu %llu]\n", (unsigned long long)em->start,
-		       (unsigned long long)em->len,
-		       (unsigned long long)start,
-		       (unsigned long long)len);
+		btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
+			(unsigned long long)em->start,
+			(unsigned long long)em->len,
+			(unsigned long long)start,
+			(unsigned long long)len);
 		err = -EIO;
 		goto out;
 	}
 
 	err = 0;
 	write_lock(&em_tree->lock);
-	ret = add_extent_mapping(em_tree, em);
+	ret = add_extent_mapping(em_tree, em, 0);
 	/* it is possible that someone inserted the extent into the tree
 	 * while we had the lock dropped.  It is also possible that
 	 * an overlapping map exists in the tree
@@ -6483,7 +6495,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 	}
 
 	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-			      ins.offset, ins.offset, 0);
+			      ins.offset, ins.offset, ins.offset, 0);
 	if (IS_ERR(em))
 		goto out;
 
@@ -6503,7 +6515,9 @@ out:
  * block must be cow'd
  */
 static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
-				      struct inode *inode, u64 offset, u64 len)
+				      struct inode *inode, u64 offset, u64 *len,
+				      u64 *orig_start, u64 *orig_block_len,
+				      u64 *ram_bytes)
 {
 	struct btrfs_path *path;
 	int ret;
@@ -6560,8 +6574,12 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
 	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 	backref_offset = btrfs_file_extent_offset(leaf, fi);
 
+	*orig_start = key.offset - backref_offset;
+	*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+	*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+
 	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-	if (extent_end < offset + len) {
+	if (extent_end < offset + *len) {
 		/* extent doesn't include our full range, must cow */
 		goto out;
 	}
@@ -6585,13 +6603,14 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
 	 */
 	disk_bytenr += backref_offset;
 	disk_bytenr += offset - key.offset;
-	num_bytes = min(offset + len, extent_end) - offset;
+	num_bytes = min(offset + *len, extent_end) - offset;
 	if (csum_exist_in_range(root, disk_bytenr, num_bytes))
 				goto out;
 	/*
 	 * all of the above have passed, it is safe to overwrite this extent
 	 * without cow
 	 */
+	*len = num_bytes;
 	ret = 1;
 out:
 	btrfs_free_path(path);
@@ -6662,7 +6681,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 					   u64 len, u64 orig_start,
 					   u64 block_start, u64 block_len,
-					   u64 orig_block_len, int type)
+					   u64 orig_block_len, u64 ram_bytes,
+					   int type)
 {
 	struct extent_map_tree *em_tree;
 	struct extent_map *em;
@@ -6683,6 +6703,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 	em->block_start = block_start;
 	em->bdev = root->fs_info->fs_devices->latest_bdev;
 	em->orig_block_len = orig_block_len;
+	em->ram_bytes = ram_bytes;
 	em->generation = -1;
 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
 	if (type == BTRFS_ORDERED_PREALLOC)
@@ -6692,10 +6713,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 		btrfs_drop_extent_cache(inode, em->start,
 				em->start + em->len - 1, 0);
 		write_lock(&em_tree->lock);
-		ret = add_extent_mapping(em_tree, em);
-		if (!ret)
-			list_move(&em->list,
-				  &em_tree->modified_extents);
+		ret = add_extent_mapping(em_tree, em, 1);
 		write_unlock(&em_tree->lock);
 	} while (ret == -EEXIST);
 
@@ -6790,7 +6808,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 	     em->block_start != EXTENT_MAP_HOLE)) {
 		int type;
 		int ret;
-		u64 block_start;
+		u64 block_start, orig_start, orig_block_len, ram_bytes;
 
 		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
 			type = BTRFS_ORDERED_PREALLOC;
@@ -6808,16 +6826,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 		if (IS_ERR(trans))
 			goto must_cow;
 
-		if (can_nocow_odirect(trans, inode, start, len) == 1) {
-			u64 orig_start = em->orig_start;
-			u64 orig_block_len = em->orig_block_len;
-
+		if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
+				      &orig_block_len, &ram_bytes) == 1) {
 			if (type == BTRFS_ORDERED_PREALLOC) {
 				free_extent_map(em);
 				em = create_pinned_em(inode, start, len,
 						       orig_start,
 						       block_start, len,
-						       orig_block_len, type);
+						       orig_block_len,
+						       ram_bytes, type);
 				if (IS_ERR(em)) {
 					btrfs_end_transaction(trans, root);
 					goto unlock_err;
@@ -6937,7 +6954,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
 				goto failed;
 			local_irq_save(flags);
 			kaddr = kmap_atomic(page);
-			csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
+			csum = btrfs_csum_data(kaddr + bvec->bv_offset,
 					       csum, bvec->bv_len);
 			btrfs_csum_final(csum, (char *)&csum);
 			kunmap_atomic(kaddr);
@@ -6946,11 +6963,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
 			flush_dcache_page(bvec->bv_page);
 			if (csum != private) {
 failed:
-				printk(KERN_ERR "btrfs csum failed ino %llu off"
-				      " %llu csum %u private %u\n",
-				      (unsigned long long)btrfs_ino(inode),
-				      (unsigned long long)start,
-				      csum, (unsigned)private);
+				btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
+					(unsigned long long)btrfs_ino(inode),
+					(unsigned long long)start,
+					csum, (unsigned)private);
 				err = -EIO;
 			}
 		}
@@ -7426,8 +7442,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 	return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
 }
 
-int btrfs_writepages(struct address_space *mapping,
-		     struct writeback_control *wbc)
+static int btrfs_writepages(struct address_space *mapping,
+			    struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
 
@@ -7942,8 +7958,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
 	if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 		     &BTRFS_I(inode)->runtime_flags)) {
-		printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
-		       (unsigned long long)btrfs_ino(inode));
+		btrfs_info(root->fs_info, "inode %llu still on the orphan list",
+			(unsigned long long)btrfs_ino(inode));
 		atomic_dec(&root->orphan_inodes);
 	}
 
@@ -7952,10 +7968,9 @@ void btrfs_destroy_inode(struct inode *inode)
 		if (!ordered)
 			break;
 		else {
-			printk(KERN_ERR "btrfs found ordered "
-			       "extent %llu %llu on inode cleanup\n",
-			       (unsigned long long)ordered->file_offset,
-			       (unsigned long long)ordered->len);
+			btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
+				(unsigned long long)ordered->file_offset,
+				(unsigned long long)ordered->len);
 			btrfs_remove_ordered_extent(inode, ordered);
 			btrfs_put_ordered_extent(ordered);
 			btrfs_put_ordered_extent(ordered);
@@ -8572,16 +8587,14 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
 		em->orig_block_len = ins.offset;
+		em->ram_bytes = ins.offset;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
 		em->generation = trans->transid;
 
 		while (1) {
 			write_lock(&em_tree->lock);
-			ret = add_extent_mapping(em_tree, em);
-			if (!ret)
-				list_move(&em->list,
-					  &em_tree->modified_extents);
+			ret = add_extent_mapping(em_tree, em, 1);
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST)
 				break;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2c02310..0de4a2f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -723,7 +723,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
 	struct dentry *dentry;
 	int error;
 
-	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+	error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
+	if (error == -EINTR)
+		return error;
 
 	dentry = lookup_one_len(name, parent->dentry, namelen);
 	error = PTR_ERR(dentry);
@@ -1152,8 +1154,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 	u64 new_align = ~((u64)128 * 1024 - 1);
 	struct page **pages = NULL;
 
-	if (extent_thresh == 0)
-		extent_thresh = 256 * 1024;
+	if (isize == 0)
+		return 0;
+
+	if (range->start >= isize)
+		return -EINVAL;
 
 	if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
 		if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -1162,8 +1167,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 			compress_type = range->compress_type;
 	}
 
-	if (isize == 0)
-		return 0;
+	if (extent_thresh == 0)
+		extent_thresh = 256 * 1024;
 
 	/*
 	 * if we were not given a file, allocate a readahead
@@ -2086,7 +2091,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	if (err)
 		goto out;
 
-	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+	err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
+	if (err == -EINTR)
+		goto out;
 	dentry = lookup_one_len(vol_args->name, parent, namelen);
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
@@ -2425,7 +2432,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
 
 	mutex_lock(&fs_devices->device_list_mutex);
 	dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
-	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (!dev) {
 		ret = -ENODEV;
@@ -2449,6 +2455,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
 	}
 
 out:
+	mutex_unlock(&fs_devices->device_list_mutex);
 	if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
 		ret = -EFAULT;
 
@@ -3003,7 +3010,7 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 	}
 }
 
-long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 {
 	struct btrfs_ioctl_space_args space_args;
 	struct btrfs_ioctl_space_info space;
@@ -3693,12 +3700,11 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 		goto drop_write;
 	}
 
-	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
-		trans = btrfs_start_transaction(root, 2);
-		if (IS_ERR(trans)) {
-			ret = PTR_ERR(trans);
-			goto out;
-		}
+	down_write(&root->fs_info->subvol_sem);
+	trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out;
 	}
 
 	switch (sa->cmd) {
@@ -3708,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	case BTRFS_QUOTA_CTL_DISABLE:
 		ret = btrfs_quota_disable(trans, root->fs_info);
 		break;
-	case BTRFS_QUOTA_CTL_RESCAN:
-		ret = btrfs_quota_rescan(root->fs_info);
-		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -3719,13 +3722,12 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	if (copy_to_user(arg, sa, sizeof(*sa)))
 		ret = -EFAULT;
 
-	if (trans) {
-		err = btrfs_commit_transaction(trans, root);
-		if (err && !ret)
-			ret = err;
-	}
+	err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
+	if (err && !ret)
+		ret = err;
 out:
 	kfree(sa);
+	up_write(&root->fs_info->subvol_sem);
 drop_write:
 	mnt_drop_write_file(file);
 	return ret;
@@ -3877,6 +3879,64 @@ drop_write:
 	return ret;
 }
 
+static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	qsa = memdup_user(arg, sizeof(*qsa));
+	if (IS_ERR(qsa)) {
+		ret = PTR_ERR(qsa);
+		goto drop_write;
+	}
+
+	if (qsa->flags) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = btrfs_qgroup_rescan(root->fs_info);
+
+out:
+	kfree(qsa);
+drop_write:
+	mnt_drop_write_file(file);
+	return ret;
+}
+
+static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+	if (!qsa)
+		return -ENOMEM;
+
+	if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		qsa->flags = 1;
+		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
+	}
+
+	if (copy_to_user(arg, qsa, sizeof(*qsa)))
+		ret = -EFAULT;
+
+	kfree(qsa);
+	return ret;
+}
+
 static long btrfs_ioctl_set_received_subvol(struct file *file,
 					    void __user *arg)
 {
@@ -4115,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_qgroup_create(file, argp);
 	case BTRFS_IOC_QGROUP_LIMIT:
 		return btrfs_ioctl_qgroup_limit(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN:
+		return btrfs_ioctl_quota_rescan(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
+		return btrfs_ioctl_quota_rescan_status(file, argp);
 	case BTRFS_IOC_DEV_REPLACE:
 		return btrfs_ioctl_dev_replace(root, argp);
 	case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e95df43..01277b8 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,7 +24,7 @@
 #include "extent_io.h"
 #include "locking.h"
 
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
 
 /*
  * if we currently have a spinning reader or writer lock
@@ -264,7 +264,7 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb)
 	BUG_ON(!atomic_read(&eb->write_locks));
 }
 
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
 {
 	BUG_ON(!atomic_read(&eb->read_locks));
 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 005c45d..1ddd728 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -986,7 +986,7 @@ out:
  * be reclaimed before their checksum is actually put into the btree
  */
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
-			   u32 *sum)
+			   u32 *sum, int len)
 {
 	struct btrfs_ordered_sum *ordered_sum;
 	struct btrfs_sector_sum *sector_sums;
@@ -995,22 +995,28 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 	unsigned long num_sectors;
 	unsigned long i;
 	u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
-	int ret = 1;
+	int index = 0;
 
 	ordered = btrfs_lookup_ordered_extent(inode, offset);
 	if (!ordered)
-		return 1;
+		return 0;
 
 	spin_lock_irq(&tree->lock);
 	list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
-		if (disk_bytenr >= ordered_sum->bytenr) {
-			num_sectors = ordered_sum->len / sectorsize;
-			sector_sums = ordered_sum->sums;
-			for (i = 0; i < num_sectors; i++) {
+		if (disk_bytenr >= ordered_sum->bytenr &&
+		    disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
+			i = (disk_bytenr - ordered_sum->bytenr) >>
+			    inode->i_sb->s_blocksize_bits;
+			sector_sums = ordered_sum->sums + i;
+			num_sectors = ordered_sum->len >>
+				      inode->i_sb->s_blocksize_bits;
+			for (; i < num_sectors; i++) {
 				if (sector_sums[i].bytenr == disk_bytenr) {
-					*sum = sector_sums[i].sum;
-					ret = 0;
-					goto out;
+					sum[index] = sector_sums[i].sum;
+					index++;
+					if (index == len)
+						goto out;
+					disk_bytenr += sectorsize;
 				}
 			}
 		}
@@ -1018,7 +1024,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 out:
 	spin_unlock_irq(&tree->lock);
 	btrfs_put_ordered_extent(ordered);
-	return ret;
+	return index;
 }
 
 
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8eadfe4..58b0e3b 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -196,7 +196,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
 							u64 len);
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 				struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+			   u32 *sum, int len);
 int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root, int wait);
 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 920957e..dc0024f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -176,7 +176,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 
 	nr = btrfs_header_nritems(l);
 
-	printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
+	btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
 		(unsigned long long)btrfs_header_bytenr(l), nr,
 		btrfs_leaf_free_space(root, l));
 	for (i = 0 ; i < nr ; i++) {
@@ -319,10 +319,9 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
 		btrfs_print_leaf(root, c);
 		return;
 	}
-	printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
-	       (unsigned long long)btrfs_header_bytenr(c),
-	      level, nr,
-	       (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
+	btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
+		(unsigned long long)btrfs_header_bytenr(c),
+		level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
 	for (i = 0; i < nr; i++) {
 		btrfs_node_key_to_cpu(c, &key, i);
 		printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index da75efe..7faddfa 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -19,5 +19,5 @@
 #ifndef __PRINT_TREE_
 #define __PRINT_TREE_
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c);
 #endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b44124d..9d49c58 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
 #include "locking.h"
 #include "ulist.h"
 #include "backref.h"
+#include "extent_io.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
  *  - compressed
  *  - sync
- *  - rescan
  *  - copy also limits on subvol creation
  *  - limit
  *  - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
 	struct btrfs_qgroup *member;
 };
 
-/* must be called with qgroup_lock held */
+struct qgroup_rescan {
+	struct btrfs_work	work;
+	struct btrfs_fs_info	*fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+				struct qgroup_rescan *qscan);
+
+/* must be called with qgroup_ioctl_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 					   u64 qgroupid)
 {
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 			}
 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 									  ptr);
-			/* FIXME read scan element */
+			fs_info->qgroup_rescan_progress.objectid =
+					btrfs_qgroup_status_rescan(l, ptr);
+			if (fs_info->qgroup_flags &
+			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+				struct qgroup_rescan *qscan =
+					kmalloc(sizeof(*qscan), GFP_NOFS);
+				if (!qscan) {
+					ret = -ENOMEM;
+					goto out;
+				}
+				fs_info->qgroup_rescan_progress.type = 0;
+				fs_info->qgroup_rescan_progress.offset = 0;
+				qgroup_rescan_start(fs_info, qscan);
+			}
 			goto next1;
 		}
 
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
 		rb_erase(n, &fs_info->qgroup_tree);
 
-		WARN_ON(!list_empty(&qgroup->dirty));
-
 		while (!list_empty(&qgroup->groups)) {
 			list = list_first_entry(&qgroup->groups,
 						struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
-	/* XXX scan */
+	btrfs_set_qgroup_status_rescan(l, ptr,
+				fs_info->qgroup_rescan_progress.objectid);
 
 	btrfs_mark_buffer_dirty(l);
 
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *quota_root;
+	struct btrfs_root *tree_root = fs_info->tree_root;
 	struct btrfs_path *path = NULL;
 	struct btrfs_qgroup_status_item *ptr;
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct btrfs_qgroup *qgroup = NULL;
 	int ret = 0;
+	int slot;
 
-	spin_lock(&fs_info->qgroup_lock);
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (fs_info->quota_root) {
 		fs_info->pending_quota_state = 1;
-		spin_unlock(&fs_info->qgroup_lock);
 		goto out;
 	}
-	spin_unlock(&fs_info->qgroup_lock);
 
 	/*
 	 * initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
-	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 
 	btrfs_mark_buffer_dirty(leaf);
 
+	key.objectid = 0;
+	key.type = BTRFS_ROOT_REF_KEY;
+	key.offset = 0;
+
+	btrfs_release_path(path);
+	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
+	if (ret > 0)
+		goto out_add_root;
+	if (ret < 0)
+		goto out_free_path;
+
+
+	while (1) {
+		slot = path->slots[0];
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+		if (found_key.type == BTRFS_ROOT_REF_KEY) {
+			ret = add_qgroup_item(trans, quota_root,
+					      found_key.offset);
+			if (ret)
+				goto out_free_path;
+
+			qgroup = add_qgroup_rb(fs_info, found_key.offset);
+			if (IS_ERR(qgroup)) {
+				ret = PTR_ERR(qgroup);
+				goto out_free_path;
+			}
+		}
+		ret = btrfs_next_item(tree_root, path);
+		if (ret < 0)
+			goto out_free_path;
+		if (ret)
+			break;
+	}
+
+out_add_root:
+	btrfs_release_path(path);
+	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
+	if (ret)
+		goto out_free_path;
+
+	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
+	if (IS_ERR(qgroup)) {
+		ret = PTR_ERR(qgroup);
+		goto out_free_path;
+	}
 	spin_lock(&fs_info->qgroup_lock);
 	fs_info->quota_root = quota_root;
 	fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
 		kfree(quota_root);
 	}
 out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	struct btrfs_root *quota_root;
 	int ret = 0;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
+	if (!fs_info->quota_root)
+		goto out;
 	spin_lock(&fs_info->qgroup_lock);
-	if (!fs_info->quota_root) {
-		spin_unlock(&fs_info->qgroup_lock);
-		return 0;
-	}
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
 	quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	btrfs_free_qgroup_config(fs_info);
 	spin_unlock(&fs_info->qgroup_lock);
 
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	ret = btrfs_clean_quota_tree(trans, quota_root);
 	if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	free_extent_buffer(quota_root->commit_root);
 	kfree(quota_root);
 out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+			 struct btrfs_qgroup *qgroup)
 {
-	/* FIXME */
-	return 0;
+	if (list_empty(&qgroup->dirty))
+		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
 	struct btrfs_root *quota_root;
+	struct btrfs_qgroup *parent;
+	struct btrfs_qgroup *member;
+	struct btrfs_qgroup_list *list;
 	int ret = 0;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	quota_root = fs_info->quota_root;
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
+	member = find_qgroup_rb(fs_info, src);
+	parent = find_qgroup_rb(fs_info, dst);
+	if (!member || !parent) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* check if such qgroup relation exist firstly */
+	list_for_each_entry(list, &member->groups, next_group) {
+		if (list->group == parent) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
 
 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
 	if (ret) {
 		del_qgroup_relation_item(trans, quota_root, src, dst);
-		return ret;
+		goto out;
 	}
 
 	spin_lock(&fs_info->qgroup_lock);
 	ret = add_relation_rb(quota_root->fs_info, src, dst);
 	spin_unlock(&fs_info->qgroup_lock);
-
+out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
 	struct btrfs_root *quota_root;
+	struct btrfs_qgroup *parent;
+	struct btrfs_qgroup *member;
+	struct btrfs_qgroup_list *list;
 	int ret = 0;
 	int err;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	quota_root = fs_info->quota_root;
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
 
+	member = find_qgroup_rb(fs_info, src);
+	parent = find_qgroup_rb(fs_info, dst);
+	if (!member || !parent) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* check if such qgroup relation exist firstly */
+	list_for_each_entry(list, &member->groups, next_group) {
+		if (list->group == parent)
+			goto exist;
+	}
+	ret = -ENOENT;
+	goto out;
+exist:
 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
 	if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
 
 	spin_lock(&fs_info->qgroup_lock);
 	del_relation_rb(fs_info, src, dst);
-
 	spin_unlock(&fs_info->qgroup_lock);
-
+out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	quota_root = fs_info->quota_root;
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
+	qgroup = find_qgroup_rb(fs_info, qgroupid);
+	if (qgroup) {
+		ret = -EEXIST;
+		goto out;
+	}
 
 	ret = add_qgroup_item(trans, quota_root, qgroupid);
+	if (ret)
+		goto out;
 
 	spin_lock(&fs_info->qgroup_lock);
 	qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
 
 	if (IS_ERR(qgroup))
 		ret = PTR_ERR(qgroup);
-
+out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	quota_root = fs_info->quota_root;
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
 
-	/* check if there are no relations to this qgroup */
-	spin_lock(&fs_info->qgroup_lock);
 	qgroup = find_qgroup_rb(fs_info, qgroupid);
-	if (qgroup) {
-		if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
-			spin_unlock(&fs_info->qgroup_lock);
-			return -EBUSY;
+	if (!qgroup) {
+		ret = -ENOENT;
+		goto out;
+	} else {
+		/* check if there are no relations to this qgroup */
+		if (!list_empty(&qgroup->groups) ||
+		    !list_empty(&qgroup->members)) {
+			ret = -EBUSY;
+			goto out;
 		}
 	}
-	spin_unlock(&fs_info->qgroup_lock);
-
 	ret = del_qgroup_item(trans, quota_root, qgroupid);
 
 	spin_lock(&fs_info->qgroup_lock);
 	del_qgroup_rb(quota_root->fs_info, qgroupid);
 	spin_unlock(&fs_info->qgroup_lock);
-
+out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
 		       struct btrfs_qgroup_limit *limit)
 {
-	struct btrfs_root *quota_root = fs_info->quota_root;
+	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
 
-	if (!quota_root)
-		return -EINVAL;
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
+	quota_root = fs_info->quota_root;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
 
+	qgroup = find_qgroup_rb(fs_info, qgroupid);
+	if (!qgroup) {
+		ret = -ENOENT;
+		goto out;
+	}
 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
 				       limit->flags, limit->max_rfer,
 				       limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 	}
 
 	spin_lock(&fs_info->qgroup_lock);
-
-	qgroup = find_qgroup_rb(fs_info, qgroupid);
-	if (!qgroup) {
-		ret = -ENOENT;
-		goto unlock;
-	}
 	qgroup->lim_flags = limit->flags;
 	qgroup->max_rfer = limit->max_rfer;
 	qgroup->max_excl = limit->max_excl;
 	qgroup->rsv_rfer = limit->rsv_rfer;
 	qgroup->rsv_excl = limit->rsv_excl;
-
-unlock:
 	spin_unlock(&fs_info->qgroup_lock);
-
+out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
-			 struct btrfs_qgroup *qgroup)
-{
-	if (list_empty(&qgroup->dirty))
-		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
 /*
  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
  * the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
+				    struct ulist *roots, struct ulist *tmp,
+				    u64 seq)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct ulist_node *tmp_unode;
+	struct ulist_iterator tmp_uiter;
+	struct btrfs_qgroup *qg;
+	int ret;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		qg = find_qgroup_rb(fs_info, unode->val);
+		if (!qg)
+			continue;
+
+		ulist_reinit(tmp);
+						/* XXX id not needed */
+		ret = ulist_add(tmp, qg->qgroupid,
+				(u64)(uintptr_t)qg, GFP_ATOMIC);
+		if (ret < 0)
+			return ret;
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+			if (qg->refcnt < seq)
+				qg->refcnt = seq + 1;
+			else
+				++qg->refcnt;
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ret = ulist_add(tmp, glist->group->qgroupid,
+						(u64)(uintptr_t)glist->group,
+						GFP_ATOMIC);
+				if (ret < 0)
+					return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
+				    struct ulist *roots, struct ulist *tmp,
+				    u64 seq, int sgn, u64 num_bytes,
+				    struct btrfs_qgroup *qgroup)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup *qg;
+	struct btrfs_qgroup_list *glist;
+	int ret;
+
+	ulist_reinit(tmp);
+	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+	if (ret < 0)
+		return ret;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(tmp, &uiter))) {
+		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+		if (qg->refcnt < seq) {
+			/* not visited by step 1 */
+			qg->rfer += sgn * num_bytes;
+			qg->rfer_cmpr += sgn * num_bytes;
+			if (roots->nnodes == 0) {
+				qg->excl += sgn * num_bytes;
+				qg->excl_cmpr += sgn * num_bytes;
+			}
+			qgroup_dirty(fs_info, qg);
+		}
+		WARN_ON(qg->tag >= seq);
+		qg->tag = seq;
+
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			ret = ulist_add(tmp, glist->group->qgroupid,
+					(uintptr_t)glist->group, GFP_ATOMIC);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
+				    struct ulist *roots, struct ulist *tmp,
+				    u64 seq, int sgn, u64 num_bytes)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup *qg;
+	struct ulist_node *tmp_unode;
+	struct ulist_iterator tmp_uiter;
+	int ret;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		qg = find_qgroup_rb(fs_info, unode->val);
+		if (!qg)
+			continue;
+
+		ulist_reinit(tmp);
+		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+		if (ret < 0)
+			return ret;
+
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+			if (qg->tag == seq)
+				continue;
+
+			if (qg->refcnt - seq == roots->nnodes) {
+				qg->excl -= sgn * num_bytes;
+				qg->excl_cmpr -= sgn * num_bytes;
+				qgroup_dirty(fs_info, qg);
+			}
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ret = ulist_add(tmp, glist->group->qgroupid,
+						(uintptr_t)glist->group,
+						GFP_ATOMIC);
+				if (ret < 0)
+					return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
 /*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	struct btrfs_root *quota_root;
 	u64 ref_root;
 	struct btrfs_qgroup *qgroup;
-	struct ulist_node *unode;
 	struct ulist *roots = NULL;
 	struct ulist *tmp = NULL;
-	struct ulist_iterator uiter;
 	u64 seq;
 	int ret = 0;
 	int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	case BTRFS_ADD_DELAYED_REF:
 	case BTRFS_ADD_DELAYED_EXTENT:
 		sgn = 1;
+		seq = btrfs_tree_mod_seq_prev(node->seq);
 		break;
 	case BTRFS_DROP_DELAYED_REF:
 		sgn = -1;
+		seq = node->seq;
 		break;
 	case BTRFS_UPDATE_DELAYED_HEAD:
 		return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 		BUG();
 	}
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			mutex_unlock(&fs_info->qgroup_rescan_lock);
+			return 0;
+		}
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
 	/*
 	 * the delayed ref sequence number we pass depends on the direction of
-	 * the operation. for add operations, we pass (node->seq - 1) to skip
+	 * the operation. for add operations, we pass
+	 * tree_mod_log_prev_seq(node->seq) to skip
 	 * the delayed ref's current sequence number, because we need the state
 	 * of the tree before the add operation. for delete operations, we pass
 	 * (node->seq) to include the delayed ref's current sequence number,
 	 * because we need the state of the tree after the delete operation.
 	 */
-	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
-				   sgn > 0 ? node->seq - 1 : node->seq, &roots);
+	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
 	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			ret = 0;
+			goto unlock;
+		}
+	}
+
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
 		goto unlock;
@@ -1175,106 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	seq = fs_info->qgroup_seq;
 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
 
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(roots, &uiter))) {
-		struct ulist_node *tmp_unode;
-		struct ulist_iterator tmp_uiter;
-		struct btrfs_qgroup *qg;
-
-		qg = find_qgroup_rb(fs_info, unode->val);
-		if (!qg)
-			continue;
-
-		ulist_reinit(tmp);
-						/* XXX id not needed */
-		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
-		ULIST_ITER_INIT(&tmp_uiter);
-		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->refcnt < seq)
-				qg->refcnt = seq + 1;
-			else
-				++qg->refcnt;
-
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ulist_add(tmp, glist->group->qgroupid,
-					  (u64)(uintptr_t)glist->group,
-					  GFP_ATOMIC);
-			}
-		}
-	}
+	ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+	if (ret)
+		goto unlock;
 
 	/*
 	 * step 2: walk from the new root
 	 */
-	ulist_reinit(tmp);
-	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(tmp, &uiter))) {
-		struct btrfs_qgroup *qg;
-		struct btrfs_qgroup_list *glist;
-
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
-		if (qg->refcnt < seq) {
-			/* not visited by step 1 */
-			qg->rfer += sgn * node->num_bytes;
-			qg->rfer_cmpr += sgn * node->num_bytes;
-			if (roots->nnodes == 0) {
-				qg->excl += sgn * node->num_bytes;
-				qg->excl_cmpr += sgn * node->num_bytes;
-			}
-			qgroup_dirty(fs_info, qg);
-		}
-		WARN_ON(qg->tag >= seq);
-		qg->tag = seq;
-
-		list_for_each_entry(glist, &qg->groups, next_group) {
-			ulist_add(tmp, glist->group->qgroupid,
-				  (uintptr_t)glist->group, GFP_ATOMIC);
-		}
-	}
+	ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
+				       node->num_bytes, qgroup);
+	if (ret)
+		goto unlock;
 
 	/*
 	 * step 3: walk again from old refs
 	 */
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(roots, &uiter))) {
-		struct btrfs_qgroup *qg;
-		struct ulist_node *tmp_unode;
-		struct ulist_iterator tmp_uiter;
-
-		qg = find_qgroup_rb(fs_info, unode->val);
-		if (!qg)
-			continue;
-
-		ulist_reinit(tmp);
-		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
-		ULIST_ITER_INIT(&tmp_uiter);
-		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->tag == seq)
-				continue;
-
-			if (qg->refcnt - seq == roots->nnodes) {
-				qg->excl -= sgn * node->num_bytes;
-				qg->excl_cmpr -= sgn * node->num_bytes;
-				qgroup_dirty(fs_info, qg);
-			}
+	ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
+				       node->num_bytes);
+	if (ret)
+		goto unlock;
 
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ulist_add(tmp, glist->group->qgroupid,
-					  (uintptr_t)glist->group,
-					  GFP_ATOMIC);
-			}
-		}
-	}
-	ret = 0;
 unlock:
 	spin_unlock(&fs_info->qgroup_lock);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
 	ulist_free(roots);
 	ulist_free(tmp);
 
@@ -1289,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_root *quota_root = fs_info->quota_root;
 	int ret = 0;
+	int start_rescan_worker = 0;
 
 	if (!quota_root)
 		goto out;
 
+	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
+		start_rescan_worker = 1;
+
 	fs_info->quota_enabled = fs_info->pending_quota_state;
 
 	spin_lock(&fs_info->qgroup_lock);
@@ -1318,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 	if (ret)
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 
+	if (!ret && start_rescan_worker) {
+		ret = btrfs_qgroup_rescan(fs_info);
+		if (ret)
+			pr_err("btrfs: start rescan quota failed: %d\n", ret);
+		ret = 0;
+	}
+
 out:
 
 	return ret;
@@ -1338,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 	struct btrfs_qgroup *srcgroup;
 	struct btrfs_qgroup *dstgroup;
 	u32 level_size = 0;
+	u64 nums;
 
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (!fs_info->quota_enabled)
-		return 0;
+		goto out;
 
-	if (!quota_root)
-		return -EINVAL;
+	if (!quota_root) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (inherit) {
+		i_qgroups = (u64 *)(inherit + 1);
+		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+		       2 * inherit->num_excl_copies;
+		for (i = 0; i < nums; ++i) {
+			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
+			if (!srcgroup) {
+				ret = -EINVAL;
+				goto out;
+			}
+			++i_qgroups;
+		}
+	}
 
 	/*
 	 * create a tracking group for the subvol itself
@@ -1470,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 unlock:
 	spin_unlock(&fs_info->qgroup_lock);
 out:
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
@@ -1514,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 		ret = -ENOMEM;
 		goto out;
 	}
-	ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+	ret = ulist_add(ulist, qgroup->qgroupid,
+			(uintptr_t)qgroup, GFP_ATOMIC);
+	if (ret < 0)
+		goto out;
 	ULIST_ITER_INIT(&uiter);
 	while ((unode = ulist_next(ulist, &uiter))) {
 		struct btrfs_qgroup *qg;
@@ -1523,25 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
-		    qg->reserved + qg->rfer + num_bytes >
+		    qg->reserved + (s64)qg->rfer + num_bytes >
 		    qg->max_rfer) {
 			ret = -EDQUOT;
 			goto out;
 		}
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
-		    qg->reserved + qg->excl + num_bytes >
+		    qg->reserved + (s64)qg->excl + num_bytes >
 		    qg->max_excl) {
 			ret = -EDQUOT;
 			goto out;
 		}
 
 		list_for_each_entry(glist, &qg->groups, next_group) {
-			ulist_add(ulist, glist->group->qgroupid,
-				  (uintptr_t)glist->group, GFP_ATOMIC);
+			ret = ulist_add(ulist, glist->group->qgroupid,
+					(uintptr_t)glist->group, GFP_ATOMIC);
+			if (ret < 0)
+				goto out;
 		}
 	}
-
+	ret = 0;
 	/*
 	 * no limits exceeded, now record the reservation into all qgroups
 	 */
@@ -1570,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 	u64 ref_root = root->root_key.objectid;
+	int ret = 0;
 
 	if (!is_fstree(ref_root))
 		return;
@@ -1592,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
 		btrfs_std_error(fs_info, -ENOMEM);
 		goto out;
 	}
-	ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+	ret = ulist_add(ulist, qgroup->qgroupid,
+			(uintptr_t)qgroup, GFP_ATOMIC);
+	if (ret < 0)
+		goto out;
 	ULIST_ITER_INIT(&uiter);
 	while ((unode = ulist_next(ulist, &uiter))) {
 		struct btrfs_qgroup *qg;
@@ -1603,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
 		qg->reserved -= num_bytes;
 
 		list_for_each_entry(glist, &qg->groups, next_group) {
-			ulist_add(ulist, glist->group->qgroupid,
-				  (uintptr_t)glist->group, GFP_ATOMIC);
+			ret = ulist_add(ulist, glist->group->qgroupid,
+					(uintptr_t)glist->group, GFP_ATOMIC);
+			if (ret < 0)
+				goto out;
 		}
 	}
 
@@ -1617,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 {
 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
 		return;
-	printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
+	pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
-		trans->delayed_ref_elem.seq);
+		(u32)(trans->delayed_ref_elem.seq >> 32),
+		(u32)trans->delayed_ref_elem.seq);
 	BUG();
 }
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+		   struct btrfs_trans_handle *trans, struct ulist *tmp,
+		   struct extent_buffer *scratch_leaf)
+{
+	struct btrfs_key found;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *roots = NULL;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct seq_list tree_mod_seq_elem = {};
+	u64 seq;
+	int slot;
+	int ret;
+
+	path->leave_spinning = 1;
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	ret = btrfs_search_slot_for_read(fs_info->extent_root,
+					 &fs_info->qgroup_rescan_progress,
+					 path, 1, 0);
+
+	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
+		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+		 fs_info->qgroup_rescan_progress.type,
+		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+		 ret);
+
+	if (ret) {
+		/*
+		 * The rescan is about to end, we will not be scanning any
+		 * further blocks. We cannot unset the RESCAN flag here, because
+		 * we want to commit the transaction if everything went well.
+		 * To make the live accounting work in this phase, we set our
+		 * scan progress pointer such that every real extent objectid
+		 * will be smaller.
+		 */
+		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+		btrfs_release_path(path);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		return ret;
+	}
+
+	btrfs_item_key_to_cpu(path->nodes[0], &found,
+			      btrfs_header_nritems(path->nodes[0]) - 1);
+	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+	slot = path->slots[0];
+	btrfs_release_path(path);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+		if (found.type != BTRFS_EXTENT_ITEM_KEY)
+			continue;
+		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+					   tree_mod_seq_elem.seq, &roots);
+		if (ret < 0)
+			goto out;
+		spin_lock(&fs_info->qgroup_lock);
+		seq = fs_info->qgroup_seq;
+		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+		if (ret) {
+			spin_unlock(&fs_info->qgroup_lock);
+			ulist_free(roots);
+			goto out;
+		}
+
+		/*
+		 * step2 of btrfs_qgroup_account_ref works from a single root,
+		 * we're doing all at once here.
+		 */
+		ulist_reinit(tmp);
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(roots, &uiter))) {
+			struct btrfs_qgroup *qg;
+
+			qg = find_qgroup_rb(fs_info, unode->val);
+			if (!qg)
+				continue;
+
+			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
+					GFP_ATOMIC);
+			if (ret < 0) {
+				spin_unlock(&fs_info->qgroup_lock);
+				ulist_free(roots);
+				goto out;
+			}
+		}
+
+		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(tmp, &uiter))) {
+			struct btrfs_qgroup *qg;
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+			qg->rfer += found.offset;
+			qg->rfer_cmpr += found.offset;
+			WARN_ON(qg->tag >= seq);
+			if (qg->refcnt - seq == roots->nnodes) {
+				qg->excl += found.offset;
+				qg->excl_cmpr += found.offset;
+			}
+			qgroup_dirty(fs_info, qg);
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ret = ulist_add(tmp, glist->group->qgroupid,
+						(uintptr_t)glist->group,
+						GFP_ATOMIC);
+				if (ret < 0) {
+					spin_unlock(&fs_info->qgroup_lock);
+					ulist_free(roots);
+					goto out;
+				}
+			}
+		}
+
+		spin_unlock(&fs_info->qgroup_lock);
+		ulist_free(roots);
+		ret = 0;
+	}
+
+out:
+	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+	return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+						   work);
+	struct btrfs_path *path;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *tmp = NULL;
+	struct extent_buffer *scratch_leaf = NULL;
+	int err = -ENOMEM;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out;
+	tmp = ulist_alloc(GFP_NOFS);
+	if (!tmp)
+		goto out;
+	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+	if (!scratch_leaf)
+		goto out;
+
+	err = 0;
+	while (!err) {
+		trans = btrfs_start_transaction(fs_info->fs_root, 0);
+		if (IS_ERR(trans)) {
+			err = PTR_ERR(trans);
+			break;
+		}
+		if (!fs_info->quota_enabled) {
+			err = -EINTR;
+		} else {
+			err = qgroup_rescan_leaf(qscan, path, trans,
+						 tmp, scratch_leaf);
+		}
+		if (err > 0)
+			btrfs_commit_transaction(trans, fs_info->fs_root);
+		else
+			btrfs_end_transaction(trans, fs_info->fs_root);
+	}
+
+out:
+	kfree(scratch_leaf);
+	ulist_free(tmp);
+	btrfs_free_path(path);
+	kfree(qscan);
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+	if (err == 2 &&
+	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	} else if (err < 0) {
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	if (err >= 0) {
+		pr_info("btrfs: qgroup scan completed%s\n",
+			err == 2 ? " (inconsistency flag cleared)" : "");
+	} else {
+		pr_err("btrfs: qgroup scan failed with %d\n", err);
+	}
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+	memset(&qscan->work, 0, sizeof(qscan->work));
+	qscan->work.func = btrfs_qgroup_rescan_worker;
+	qscan->fs_info = fs_info;
+
+	pr_info("btrfs: qgroup scan started\n");
+	btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+	int ret = 0;
+	struct rb_node *n;
+	struct btrfs_qgroup *qgroup;
+	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+	if (!qscan)
+		return -ENOMEM;
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+		ret = -EINPROGRESS;
+	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+		ret = -EINVAL;
+	if (ret) {
+		spin_unlock(&fs_info->qgroup_lock);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		kfree(qscan);
+		return ret;
+	}
+
+	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+	memset(&fs_info->qgroup_rescan_progress, 0,
+		sizeof(fs_info->qgroup_rescan_progress));
+
+	/* clear all current qgroup tracking information */
+	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+		qgroup = rb_entry(n, struct btrfs_qgroup, node);
+		qgroup->rfer = 0;
+		qgroup->rfer_cmpr = 0;
+		qgroup->excl = 0;
+		qgroup->excl_cmpr = 0;
+	}
+	spin_unlock(&fs_info->qgroup_lock);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	qgroup_rescan_start(fs_info, qscan);
+
+	return 0;
+}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9a79fb7..0740621 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -410,7 +410,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
 /*
  * remove everything in the cache
  */
-void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
+static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
 {
 	struct btrfs_stripe_hash_table *table;
 	unsigned long flags;
@@ -1010,12 +1010,12 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
  * this will try to merge into existing bios if possible, and returns
  * zero if all went well.
  */
-int rbio_add_io_page(struct btrfs_raid_bio *rbio,
-		     struct bio_list *bio_list,
-		     struct page *page,
-		     int stripe_nr,
-		     unsigned long page_index,
-		     unsigned long bio_max_len)
+static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
+			    struct bio_list *bio_list,
+			    struct page *page,
+			    int stripe_nr,
+			    unsigned long page_index,
+			    unsigned long bio_max_len)
 {
 	struct bio *last = bio_list->tail;
 	u64 last_end = 0;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 96b93da..1031b69 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -955,10 +955,11 @@ int btrfs_reada_wait(void *handle)
 	while (atomic_read(&rc->elems)) {
 		wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
 				   5 * HZ);
-		dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+		dump_devs(rc->root->fs_info,
+			  atomic_read(&rc->elems) < 10 ? 1 : 0);
 	}
 
-	dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+	dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
 
 	kref_put(&rc->refcnt, reada_control_release);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b67171e..704a1b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,8 +326,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
 	return NULL;
 }
 
-void backref_tree_panic(struct rb_node *rb_node, int errno,
-					  u64 bytenr)
+static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
 {
 
 	struct btrfs_fs_info *fs_info = NULL;
@@ -619,10 +618,13 @@ static noinline_for_stack
 int find_inline_backref(struct extent_buffer *leaf, int slot,
 			unsigned long *ptr, unsigned long *end)
 {
+	struct btrfs_key key;
 	struct btrfs_extent_item *ei;
 	struct btrfs_tree_block_info *bi;
 	u32 item_size;
 
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+
 	item_size = btrfs_item_size_nr(leaf, slot);
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 	if (item_size < sizeof(*ei)) {
@@ -634,13 +636,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
 	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
 		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
 
-	if (item_size <= sizeof(*ei) + sizeof(*bi)) {
+	if (key.type == BTRFS_EXTENT_ITEM_KEY &&
+	    item_size <= sizeof(*ei) + sizeof(*bi)) {
 		WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
 		return 1;
 	}
 
-	bi = (struct btrfs_tree_block_info *)(ei + 1);
-	*ptr = (unsigned long)(bi + 1);
+	if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		*ptr = (unsigned long)(bi + 1);
+	} else {
+		*ptr = (unsigned long)(ei + 1);
+	}
 	*end = (unsigned long)ei + item_size;
 	return 0;
 }
@@ -708,7 +715,7 @@ again:
 	end = 0;
 	ptr = 0;
 	key.objectid = cur->bytenr;
-	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.type = BTRFS_METADATA_ITEM_KEY;
 	key.offset = (u64)-1;
 
 	path1->search_commit_root = 1;
@@ -766,7 +773,8 @@ again:
 				break;
 			}
 
-			if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+			if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+			    key.type == BTRFS_METADATA_ITEM_KEY) {
 				ret = find_inline_backref(eb, path1->slots[0],
 							  &ptr, &end);
 				if (ret)
@@ -1762,7 +1770,11 @@ again:
 
 			eb = read_tree_block(dest, old_bytenr, blocksize,
 					     old_ptr_gen);
-			BUG_ON(!eb);
+			if (!eb || !extent_buffer_uptodate(eb)) {
+				ret = (!eb) ? -ENOMEM : -EIO;
+				free_extent_buffer(eb);
+				return ret;
+			}
 			btrfs_tree_lock(eb);
 			if (cow) {
 				ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -1915,6 +1927,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
 		blocksize = btrfs_level_size(root, i - 1);
 		eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		if (!eb || !extent_buffer_uptodate(eb)) {
+			free_extent_buffer(eb);
+			return -EIO;
+		}
 		BUG_ON(btrfs_header_level(eb) != i - 1);
 		path->nodes[i - 1] = eb;
 		path->slots[i - 1] = 0;
@@ -2592,7 +2608,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 		blocksize = btrfs_level_size(root, node->level);
 		generation = btrfs_node_ptr_generation(upper->eb, slot);
 		eb = read_tree_block(root, bytenr, blocksize, generation);
-		if (!eb) {
+		if (!eb || !extent_buffer_uptodate(eb)) {
+			free_extent_buffer(eb);
 			err = -EIO;
 			goto next;
 		}
@@ -2753,7 +2770,10 @@ static int get_tree_block_key(struct reloc_control *rc,
 	BUG_ON(block->key_ready);
 	eb = read_tree_block(rc->extent_root, block->bytenr,
 			     block->key.objectid, block->key.offset);
-	BUG_ON(!eb);
+	if (!eb || !extent_buffer_uptodate(eb)) {
+		free_extent_buffer(eb);
+		return -EIO;
+	}
 	WARN_ON(btrfs_header_level(eb) != block->level);
 	if (block->level == 0)
 		btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -2768,8 +2788,13 @@ static int reada_tree_block(struct reloc_control *rc,
 			    struct tree_block *block)
 {
 	BUG_ON(block->key_ready);
-	readahead_tree_block(rc->extent_root, block->bytenr,
-			     block->key.objectid, block->key.offset);
+	if (block->key.type == BTRFS_METADATA_ITEM_KEY)
+		readahead_tree_block(rc->extent_root, block->bytenr,
+				     block->key.objectid,
+				     rc->extent_root->leafsize);
+	else
+		readahead_tree_block(rc->extent_root, block->bytenr,
+				     block->key.objectid, block->key.offset);
 	return 0;
 }
 
@@ -2850,7 +2875,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path) {
 		err = -ENOMEM;
-		goto out_path;
+		goto out_free_blocks;
 	}
 
 	rb_node = rb_first(blocks);
@@ -2864,8 +2889,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
 	rb_node = rb_first(blocks);
 	while (rb_node) {
 		block = rb_entry(rb_node, struct tree_block, rb_node);
-		if (!block->key_ready)
-			get_tree_block_key(rc, block);
+		if (!block->key_ready) {
+			err = get_tree_block_key(rc, block);
+			if (err)
+				goto out_free_path;
+		}
 		rb_node = rb_next(rb_node);
 	}
 
@@ -2892,8 +2920,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
 out:
 	err = finish_pending_nodes(trans, rc, path, err);
 
+out_free_path:
 	btrfs_free_path(path);
-out_path:
+out_free_blocks:
 	free_block_list(blocks);
 	return err;
 }
@@ -2965,7 +2994,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
 	lock_extent(&BTRFS_I(inode)->io_tree, start, end);
 	while (1) {
 		write_lock(&em_tree->lock);
-		ret = add_extent_mapping(em_tree, em);
+		ret = add_extent_mapping(em_tree, em, 0);
 		write_unlock(&em_tree->lock);
 		if (ret != -EEXIST) {
 			free_extent_map(em);
@@ -3176,12 +3205,17 @@ static int add_tree_block(struct reloc_control *rc,
 	eb =  path->nodes[0];
 	item_size = btrfs_item_size_nr(eb, path->slots[0]);
 
-	if (item_size >= sizeof(*ei) + sizeof(*bi)) {
+	if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
+	    item_size >= sizeof(*ei) + sizeof(*bi)) {
 		ei = btrfs_item_ptr(eb, path->slots[0],
 				struct btrfs_extent_item);
-		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
+			bi = (struct btrfs_tree_block_info *)(ei + 1);
+			level = btrfs_tree_block_level(eb, bi);
+		} else {
+			level = (int)extent_key->offset;
+		}
 		generation = btrfs_extent_generation(eb, ei);
-		level = btrfs_tree_block_level(eb, bi);
 	} else {
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 		u64 ref_owner;
@@ -3210,7 +3244,7 @@ static int add_tree_block(struct reloc_control *rc,
 		return -ENOMEM;
 
 	block->bytenr = extent_key->objectid;
-	block->key.objectid = extent_key->offset;
+	block->key.objectid = rc->extent_root->leafsize;
 	block->key.offset = generation;
 	block->level = level;
 	block->key_ready = 0;
@@ -3252,9 +3286,15 @@ static int __add_tree_block(struct reloc_control *rc,
 	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
-	BUG_ON(ret);
 
 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	if (ret > 0) {
+		if (key.objectid == bytenr &&
+		    key.type == BTRFS_METADATA_ITEM_KEY)
+			ret = 0;
+	}
+	BUG_ON(ret);
+
 	ret = add_tree_block(rc, &key, path, blocks);
 out:
 	btrfs_free_path(path);
@@ -3275,7 +3315,8 @@ static int block_use_full_backref(struct reloc_control *rc,
 		return 1;
 
 	ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
-				       eb->start, eb->len, NULL, &flags);
+				       eb->start, btrfs_header_level(eb), 1,
+				       NULL, &flags);
 	BUG_ON(ret);
 
 	if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
@@ -3644,12 +3685,25 @@ next:
 			break;
 		}
 
-		if (key.type != BTRFS_EXTENT_ITEM_KEY ||
+		if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+		    key.type != BTRFS_METADATA_ITEM_KEY) {
+			path->slots[0]++;
+			goto next;
+		}
+
+		if (key.type == BTRFS_EXTENT_ITEM_KEY &&
 		    key.objectid + key.offset <= rc->search_start) {
 			path->slots[0]++;
 			goto next;
 		}
 
+		if (key.type == BTRFS_METADATA_ITEM_KEY &&
+		    key.objectid + rc->extent_root->leafsize <=
+		    rc->search_start) {
+			path->slots[0]++;
+			goto next;
+		}
+
 		ret = find_first_extent_bit(&rc->processed_blocks,
 					    key.objectid, &start, &end,
 					    EXTENT_DIRTY, NULL);
@@ -3658,7 +3712,11 @@ next:
 			btrfs_release_path(path);
 			rc->search_start = end + 1;
 		} else {
-			rc->search_start = key.objectid + key.offset;
+			if (key.type == BTRFS_EXTENT_ITEM_KEY)
+				rc->search_start = key.objectid + key.offset;
+			else
+				rc->search_start = key.objectid +
+					rc->extent_root->leafsize;
 			memcpy(extent_key, &key, sizeof(key));
 			return 0;
 		}
@@ -4105,10 +4163,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 
 	while (1) {
 		mutex_lock(&fs_info->cleaner_mutex);
-
-		btrfs_clean_old_snapshots(fs_info->tree_root);
 		ret = relocate_block_group(rc);
-
 		mutex_unlock(&fs_info->cleaner_mutex);
 		if (ret < 0) {
 			err = ret;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 668af53..5bf1ed5 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,9 +29,8 @@
  * generation numbers as then we know the root was once mounted with an older
  * kernel that was not aware of the root item structure change.
  */
-void btrfs_read_root_item(struct btrfs_root *root,
-			 struct extent_buffer *eb, int slot,
-			 struct btrfs_root_item *item)
+void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+			  struct btrfs_root_item *item)
 {
 	uuid_le uuid;
 	int len;
@@ -104,7 +103,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
 		goto out;
 	}
 	if (item)
-		btrfs_read_root_item(root, l, slot, item);
+		btrfs_read_root_item(l, slot, item);
 	if (key)
 		memcpy(key, &found_key, sizeof(found_key));
 
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 85e072b..f489e24 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1336,7 +1336,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
 	int page_num;
 	u8 calculated_csum[BTRFS_CSUM_SIZE];
 	u32 crc = ~(u32)0;
-	struct btrfs_root *root = fs_info->extent_root;
 	void *mapped_buffer;
 
 	WARN_ON(!sblock->pagev[0]->page);
@@ -1365,12 +1364,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
 
 	for (page_num = 0;;) {
 		if (page_num == 0 && is_metadata)
-			crc = btrfs_csum_data(root,
+			crc = btrfs_csum_data(
 				((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
 				crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
 		else
-			crc = btrfs_csum_data(root, mapped_buffer, crc,
-					      PAGE_SIZE);
+			crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
 
 		kunmap_atomic(mapped_buffer);
 		page_num++;
@@ -1657,7 +1655,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
 	void *buffer;
 	u32 crc = ~(u32)0;
 	int fail = 0;
-	struct btrfs_root *root = sctx->dev_root;
 	u64 len;
 	int index;
 
@@ -1674,7 +1671,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
 	for (;;) {
 		u64 l = min_t(u64, len, PAGE_SIZE);
 
-		crc = btrfs_csum_data(root, buffer, crc, l);
+		crc = btrfs_csum_data(buffer, crc, l);
 		kunmap_atomic(buffer);
 		len -= l;
 		if (len == 0)
@@ -1744,7 +1741,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
 	for (;;) {
 		u64 l = min_t(u64, len, mapped_size);
 
-		crc = btrfs_csum_data(root, p, crc, l);
+		crc = btrfs_csum_data(p, crc, l);
 		kunmap_atomic(mapped_buffer);
 		len -= l;
 		if (len == 0)
@@ -1805,7 +1802,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 	for (;;) {
 		u64 l = min_t(u64, len, mapped_size);
 
-		crc = btrfs_csum_data(root, p, crc, l);
+		crc = btrfs_csum_data(p, crc, l);
 		kunmap_atomic(mapped_buffer);
 		len -= l;
 		if (len == 0)
@@ -2236,12 +2233,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	u64 flags;
 	int ret;
 	int slot;
-	int i;
 	u64 nstripes;
 	struct extent_buffer *l;
 	struct btrfs_key key;
 	u64 physical;
 	u64 logical;
+	u64 logic_end;
 	u64 generation;
 	int mirror_num;
 	struct reada_control *reada1;
@@ -2255,6 +2252,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	u64 extent_len;
 	struct btrfs_device *extent_dev;
 	int extent_mirror_num;
+	int stop_loop;
 
 	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
 			 BTRFS_BLOCK_GROUP_RAID6)) {
@@ -2315,8 +2313,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	key_start.type = BTRFS_EXTENT_ITEM_KEY;
 	key_start.offset = (u64)0;
 	key_end.objectid = base + offset + nstripes * increment;
-	key_end.type = BTRFS_EXTENT_ITEM_KEY;
-	key_end.offset = (u64)0;
+	key_end.type = BTRFS_METADATA_ITEM_KEY;
+	key_end.offset = (u64)-1;
 	reada1 = btrfs_reada_add(root, &key_start, &key_end);
 
 	key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -2354,8 +2352,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	 */
 	logical = base + offset;
 	physical = map->stripes[num].physical;
+	logic_end = logical + increment * nstripes;
 	ret = 0;
-	for (i = 0; i < nstripes; ++i) {
+	while (logical < logic_end) {
 		/*
 		 * canceled?
 		 */
@@ -2391,19 +2390,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 			wake_up(&fs_info->scrub_pause_wait);
 		}
 
-		ret = btrfs_lookup_csums_range(csum_root, logical,
-					       logical + map->stripe_len - 1,
-					       &sctx->csum_list, 1);
-		if (ret)
-			goto out;
-
 		key.objectid = logical;
 		key.type = BTRFS_EXTENT_ITEM_KEY;
-		key.offset = (u64)0;
+		key.offset = (u64)-1;
 
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		if (ret < 0)
 			goto out;
+
 		if (ret > 0) {
 			ret = btrfs_previous_item(root, path, 0,
 						  BTRFS_EXTENT_ITEM_KEY);
@@ -2420,7 +2414,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 			}
 		}
 
+		stop_loop = 0;
 		while (1) {
+			u64 bytes;
+
 			l = path->nodes[0];
 			slot = path->slots[0];
 			if (slot >= btrfs_header_nritems(l)) {
@@ -2430,19 +2427,30 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 				if (ret < 0)
 					goto out;
 
+				stop_loop = 1;
 				break;
 			}
 			btrfs_item_key_to_cpu(l, &key, slot);
 
-			if (key.objectid + key.offset <= logical)
-				goto next;
+			if (key.type == BTRFS_METADATA_ITEM_KEY)
+				bytes = root->leafsize;
+			else
+				bytes = key.offset;
 
-			if (key.objectid >= logical + map->stripe_len)
-				break;
+			if (key.objectid + bytes <= logical)
+				goto next;
 
-			if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
+			if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+			    key.type != BTRFS_METADATA_ITEM_KEY)
 				goto next;
 
+			if (key.objectid >= logical + map->stripe_len) {
+				/* out of this device extent */
+				if (key.objectid >= logic_end)
+					stop_loop = 1;
+				break;
+			}
+
 			extent = btrfs_item_ptr(l, slot,
 						struct btrfs_extent_item);
 			flags = btrfs_extent_flags(l, extent);
@@ -2458,22 +2466,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 				goto next;
 			}
 
+again:
+			extent_logical = key.objectid;
+			extent_len = bytes;
+
 			/*
 			 * trim extent to this stripe
 			 */
-			if (key.objectid < logical) {
-				key.offset -= logical - key.objectid;
-				key.objectid = logical;
+			if (extent_logical < logical) {
+				extent_len -= logical - extent_logical;
+				extent_logical = logical;
 			}
-			if (key.objectid + key.offset >
+			if (extent_logical + extent_len >
 			    logical + map->stripe_len) {
-				key.offset = logical + map->stripe_len -
-					     key.objectid;
+				extent_len = logical + map->stripe_len -
+					     extent_logical;
 			}
 
-			extent_logical = key.objectid;
-			extent_physical = key.objectid - logical + physical;
-			extent_len = key.offset;
+			extent_physical = extent_logical - logical + physical;
 			extent_dev = scrub_dev;
 			extent_mirror_num = mirror_num;
 			if (is_dev_replace)
@@ -2481,13 +2491,35 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 						   extent_len, &extent_physical,
 						   &extent_dev,
 						   &extent_mirror_num);
+
+			ret = btrfs_lookup_csums_range(csum_root, logical,
+						logical + map->stripe_len - 1,
+						&sctx->csum_list, 1);
+			if (ret)
+				goto out;
+
 			ret = scrub_extent(sctx, extent_logical, extent_len,
 					   extent_physical, extent_dev, flags,
 					   generation, extent_mirror_num,
-					   key.objectid - logical + physical);
+					   extent_physical);
 			if (ret)
 				goto out;
 
+			if (extent_logical + extent_len <
+			    key.objectid + bytes) {
+				logical += increment;
+				physical += map->stripe_len;
+
+				if (logical < key.objectid + bytes) {
+					cond_resched();
+					goto again;
+				}
+
+				if (logical >= logic_end) {
+					stop_loop = 1;
+					break;
+				}
+			}
 next:
 			path->slots[0]++;
 		}
@@ -2495,8 +2527,14 @@ next:
 		logical += increment;
 		physical += map->stripe_len;
 		spin_lock(&sctx->stat_lock);
-		sctx->stat.last_physical = physical;
+		if (stop_loop)
+			sctx->stat.last_physical = map->stripes[num].physical +
+						   length;
+		else
+			sctx->stat.last_physical = physical;
 		spin_unlock(&sctx->stat_lock);
+		if (stop_loop)
+			break;
 	}
 out:
 	/* push queued extents */
@@ -3005,28 +3043,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
-{
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_device *dev;
-	int ret;
-
-	/*
-	 * we have to hold the device_list_mutex here so the device
-	 * does not go away in cancel_dev. FIXME: find a better solution
-	 */
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
-	dev = btrfs_find_device(fs_info, devid, NULL, NULL);
-	if (!dev) {
-		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-		return -ENODEV;
-	}
-	ret = btrfs_scrub_cancel_dev(fs_info, dev);
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
-	return ret;
-}
-
 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
 			 struct btrfs_scrub_progress *progress)
 {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c85e7c6..ff40f1c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -387,7 +387,7 @@ static struct btrfs_path *alloc_path_for_send(void)
 	return path;
 }
 
-int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
+static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 {
 	int ret;
 	mm_segment_t old_fs;
@@ -3479,7 +3479,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
 	struct send_ctx *sctx = ctx;
 	char *found_data = NULL;
 	int found_data_len  = 0;
-	struct fs_path *p = NULL;
 
 	ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
 			sctx->cmp_key, name, name_len, &found_data,
@@ -3498,7 +3497,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
 	}
 
 	kfree(found_data);
-	fs_path_free(sctx, p);
 	return ret;
 }
 
@@ -4529,9 +4527,11 @@ static int send_subvol(struct send_ctx *sctx)
 {
 	int ret;
 
-	ret = send_header(sctx);
-	if (ret < 0)
-		goto out;
+	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
+		ret = send_header(sctx);
+		if (ret < 0)
+			goto out;
+	}
 
 	ret = send_subvol_begin(sctx);
 	if (ret < 0)
@@ -4593,7 +4593,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 		goto out;
 	}
 
-	if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
+	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4612,8 +4612,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 	sctx->flags = arg->flags;
 
 	sctx->send_filp = fget(arg->send_fd);
-	if (IS_ERR(sctx->send_filp)) {
-		ret = PTR_ERR(sctx->send_filp);
+	if (!sctx->send_filp) {
+		ret = -EBADF;
 		goto out;
 	}
 
@@ -4704,12 +4704,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 	if (ret < 0)
 		goto out;
 
-	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
-	if (ret < 0)
-		goto out;
-	ret = send_cmd(sctx);
-	if (ret < 0)
-		goto out;
+	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
+		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
+		if (ret < 0)
+			goto out;
+		ret = send_cmd(sctx);
+		if (ret < 0)
+			goto out;
+	}
 
 out:
 	kfree(arg);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 8bb18f7..48d425a 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -131,5 +131,4 @@ enum {
 
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
-int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
 #endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f6b8859..a4807ce 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,7 @@
 #include "compression.h"
 #include "rcu-string.h"
 #include "dev-replace.h"
+#include "free-space-cache.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
@@ -63,9 +64,9 @@
 static const struct super_operations btrfs_super_ops;
 static struct file_system_type btrfs_fs_type;
 
-static const char *btrfs_decode_error(int errno, char nbuf[16])
+static const char *btrfs_decode_error(int errno)
 {
-	char *errstr = NULL;
+	char *errstr = "unknown";
 
 	switch (errno) {
 	case -EIO:
@@ -80,18 +81,18 @@ static const char *btrfs_decode_error(int errno, char nbuf[16])
 	case -EEXIST:
 		errstr = "Object already exists";
 		break;
-	default:
-		if (nbuf) {
-			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
-				errstr = nbuf;
-		}
+	case -ENOSPC:
+		errstr = "No space left";
+		break;
+	case -ENOENT:
+		errstr = "No such entry";
 		break;
 	}
 
 	return errstr;
 }
 
-static void __save_error_info(struct btrfs_fs_info *fs_info)
+static void save_error_info(struct btrfs_fs_info *fs_info)
 {
 	/*
 	 * today we only save the error info into ram.  Long term we'll
@@ -100,11 +101,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
 	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 }
 
-static void save_error_info(struct btrfs_fs_info *fs_info)
-{
-	__save_error_info(fs_info);
-}
-
 /* btrfs handle error by forcing the filesystem readonly */
 static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 {
@@ -115,7 +111,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		sb->s_flags |= MS_RDONLY;
-		printk(KERN_INFO "btrfs is forced readonly\n");
+		btrfs_info(fs_info, "forced readonly");
 		/*
 		 * Note that a running device replace operation is not
 		 * canceled here although there is no way to update
@@ -126,7 +122,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 		 * mounted writeable again, the device replace
 		 * operation continues.
 		 */
-//		WARN_ON(1);
 	}
 }
 
@@ -139,7 +134,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 		       unsigned int line, int errno, const char *fmt, ...)
 {
 	struct super_block *sb = fs_info->sb;
-	char nbuf[16];
 	const char *errstr;
 
 	/*
@@ -149,7 +143,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
   		return;
 
-  	errstr = btrfs_decode_error(errno, nbuf);
+	errstr = btrfs_decode_error(errno);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@@ -158,19 +152,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 		vaf.fmt = fmt;
 		vaf.va = &args;
 
-		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
-			sb->s_id, function, line, errstr, &vaf);
+		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n",
+			sb->s_id, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
-			sb->s_id, function, line, errstr);
+		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n",
+			sb->s_id, function, line, errno, errstr);
 	}
 
 	/* Don't go through full error handling during mount */
-	if (sb->s_flags & MS_BORN) {
-		save_error_info(fs_info);
+	save_error_info(fs_info);
+	if (sb->s_flags & MS_BORN)
 		btrfs_handle_error(fs_info);
-	}
 }
 
 static const char * const logtypes[] = {
@@ -184,7 +177,7 @@ static const char * const logtypes[] = {
 	"debug",
 };
 
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 {
 	struct super_block *sb = fs_info->sb;
 	char lvl[4];
@@ -208,7 +201,7 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
+	printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
 
 	va_end(args);
 }
@@ -252,18 +245,24 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, const char *function,
 			       unsigned int line, int errno)
 {
-	WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n");
+	/*
+	 * Report first abort since mount
+	 */
+	if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
+				&root->fs_info->fs_state)) {
+		WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n",
+				errno);
+	}
 	trans->aborted = errno;
 	/* Nothing used. The other threads that have joined this
 	 * transaction may be able to continue. */
 	if (!trans->blocks_used) {
-		char nbuf[16];
 		const char *errstr;
 
-		errstr = btrfs_decode_error(errno, nbuf);
-		btrfs_printk(root->fs_info,
-			     "%s:%d: Aborting unused transaction(%s).\n",
-			     function, line, errstr);
+		errstr = btrfs_decode_error(errno);
+		btrfs_warn(root->fs_info,
+		           "%s:%d: Aborting unused transaction(%s).",
+		           function, line, errstr);
 		return;
 	}
 	ACCESS_ONCE(trans->transaction->aborted) = errno;
@@ -276,7 +275,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 		   unsigned int line, int errno, const char *fmt, ...)
 {
-	char nbuf[16];
 	char *s_id = "<unknown>";
 	const char *errstr;
 	struct va_format vaf = { .fmt = fmt };
@@ -288,13 +286,13 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 	va_start(args, fmt);
 	vaf.va = &args;
 
-	errstr = btrfs_decode_error(errno, nbuf);
+	errstr = btrfs_decode_error(errno);
 	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
-		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
-			s_id, function, line, &vaf, errstr);
+		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
+			s_id, function, line, &vaf, errno, errstr);
 
-	printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
-	       s_id, function, line, &vaf, errstr);
+	printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
+	       s_id, function, line, &vaf, errno, errstr);
 	va_end(args);
 	/* Caller calls BUG() */
 }
@@ -650,7 +648,7 @@ out:
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
 		void *holder, char **subvol_name, u64 *subvol_objectid,
-		u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
+		struct btrfs_fs_devices **fs_devices)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *device_name, *opts, *orig, *p;
@@ -693,16 +691,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 			}
 			break;
 		case Opt_subvolrootid:
-			intarg = 0;
-			error = match_int(&args[0], &intarg);
-			if (!error) {
-				/* we want the original fs_tree */
-				if (!intarg)
-					*subvol_rootid =
-						BTRFS_FS_TREE_OBJECTID;
-				else
-					*subvol_rootid = intarg;
-			}
+			printk(KERN_WARNING
+				"btrfs: 'subvolrootid' mount option is deprecated and has no effect\n");
 			break;
 		case Opt_device:
 			device_name = match_strdup(&args[0]);
@@ -876,7 +866,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 		return 0;
 	}
 
-	btrfs_wait_ordered_extents(root, 0);
+	btrfs_wait_ordered_extents(root, 1);
 
 	trans = btrfs_attach_transaction_barrier(root);
 	if (IS_ERR(trans)) {
@@ -1080,7 +1070,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
 	u64 subvol_objectid = 0;
-	u64 subvol_rootid = 0;
 	int error = 0;
 
 	if (!(flags & MS_RDONLY))
@@ -1088,7 +1077,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
 	error = btrfs_parse_early_options(data, mode, fs_type,
 					  &subvol_name, &subvol_objectid,
-					  &subvol_rootid, &fs_devices);
+					  &fs_devices);
 	if (error) {
 		kfree(subvol_name);
 		return ERR_PTR(error);
@@ -1202,11 +1191,14 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
 			      new_pool_size);
 }
 
-static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info,
-					 unsigned long old_opts, int flags)
+static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
 {
 	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+}
 
+static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
+				       unsigned long old_opts, int flags)
+{
 	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
 	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
 	     (flags & MS_RDONLY))) {
@@ -1247,7 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
 	int ret;
 
-	btrfs_remount_prepare(fs_info, old_opts, *flags);
+	btrfs_remount_prepare(fs_info);
 
 	ret = btrfs_parse_options(root, data);
 	if (ret) {
@@ -1255,6 +1247,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore;
 	}
 
+	btrfs_remount_begin(fs_info, old_opts, *flags);
 	btrfs_resize_thread_pool(fs_info,
 		fs_info->thread_pool_size, old_thread_pool_size);
 
@@ -1739,6 +1732,10 @@ static int __init init_btrfs_fs(void)
 
 	btrfs_init_lockdep();
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	btrfs_test_free_space_cache();
+#endif
+
 	printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
 	return 0;
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 50767bb..0544587 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,7 +34,7 @@
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
-void put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction)
 {
 	WARN_ON(atomic_read(&transaction->use_count) == 0);
 	if (atomic_dec_and_test(&transaction->use_count)) {
@@ -162,7 +162,7 @@ loop:
 	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
 		WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
 			"creating a fresh transaction\n");
-	atomic_set(&fs_info->tree_mod_seq, 0);
+	atomic64_set(&fs_info->tree_mod_seq, 0);
 
 	spin_lock_init(&cur_trans->commit_lock);
 	spin_lock_init(&cur_trans->delayed_refs.lock);
@@ -707,23 +707,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root)
 {
-	int ret;
-
-	ret = __btrfs_end_transaction(trans, root, 0);
-	if (ret)
-		return ret;
-	return 0;
+	return __btrfs_end_transaction(trans, root, 0);
 }
 
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root)
 {
-	int ret;
-
-	ret = __btrfs_end_transaction(trans, root, 1);
-	if (ret)
-		return ret;
-	return 0;
+	return __btrfs_end_transaction(trans, root, 1);
 }
 
 int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
@@ -948,7 +938,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 int btrfs_add_dead_root(struct btrfs_root *root)
 {
 	spin_lock(&root->fs_info->trans_lock);
-	list_add(&root->root_list, &root->fs_info->dead_roots);
+	list_add_tail(&root->root_list, &root->fs_info->dead_roots);
 	spin_unlock(&root->fs_info->trans_lock);
 	return 0;
 }
@@ -1179,13 +1169,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
 	memcpy(new_root_item->parent_uuid, root->root_item.uuid,
 			BTRFS_UUID_SIZE);
+	if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
+		memset(new_root_item->received_uuid, 0,
+		       sizeof(new_root_item->received_uuid));
+		memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
+		memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
+		btrfs_set_root_stransid(new_root_item, 0);
+		btrfs_set_root_rtransid(new_root_item, 0);
+	}
 	new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
 	new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
 	btrfs_set_root_otransid(new_root_item, trans->transid);
-	memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
-	memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
-	btrfs_set_root_stransid(new_root_item, 0);
-	btrfs_set_root_rtransid(new_root_item, 0);
 
 	old = btrfs_lock_root_node(root);
 	ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -1487,6 +1481,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 		current->journal_info = NULL;
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+	spin_lock(&root->fs_info->trans_lock);
+	root->fs_info->trans_no_join = 0;
+	spin_unlock(&root->fs_info->trans_lock);
 }
 
 static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
@@ -1808,7 +1806,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	ret = btrfs_write_and_wait_transaction(trans, root);
 	if (ret) {
 		btrfs_error(root->fs_info, ret,
-			    "Error while writing out transaction.");
+			    "Error while writing out transaction");
 		mutex_unlock(&root->fs_info->tree_log_mutex);
 		goto cleanup_transaction;
 	}
@@ -1864,8 +1862,7 @@ cleanup_transaction:
 		btrfs_qgroup_free(root, trans->qgroup_reserved);
 		trans->qgroup_reserved = 0;
 	}
-	btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
-//	WARN_ON(1);
+	btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
 	cleanup_transaction(trans, root, ret);
@@ -1874,31 +1871,49 @@ cleanup_transaction:
 }
 
 /*
- * interface function to delete all the snapshots we have scheduled for deletion
+ * return < 0 if error
+ * 0 if there are no more dead_roots at the time of call
+ * 1 there are more to be processed, call me again
+ *
+ * The return value indicates there are certainly more snapshots to delete, but
+ * if there comes a new one during processing, it may return 0. We don't mind,
+ * because btrfs_commit_super will poke cleaner thread and it will process it a
+ * few seconds later.
  */
-int btrfs_clean_old_snapshots(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
 {
-	LIST_HEAD(list);
+	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
+	if (fs_info->sb->s_flags & MS_RDONLY) {
+		pr_debug("btrfs: cleaner called for RO fs!\n");
+		return 0;
+	}
+
 	spin_lock(&fs_info->trans_lock);
-	list_splice_init(&fs_info->dead_roots, &list);
+	if (list_empty(&fs_info->dead_roots)) {
+		spin_unlock(&fs_info->trans_lock);
+		return 0;
+	}
+	root = list_first_entry(&fs_info->dead_roots,
+			struct btrfs_root, root_list);
+	list_del(&root->root_list);
 	spin_unlock(&fs_info->trans_lock);
 
-	while (!list_empty(&list)) {
-		int ret;
-
-		root = list_entry(list.next, struct btrfs_root, root_list);
-		list_del(&root->root_list);
+	pr_debug("btrfs: cleaner removing %llu\n",
+			(unsigned long long)root->objectid);
 
-		btrfs_kill_all_delayed_nodes(root);
+	btrfs_kill_all_delayed_nodes(root);
 
-		if (btrfs_header_backref_rev(root->node) <
-		    BTRFS_MIXED_BACKREF_REV)
-			ret = btrfs_drop_snapshot(root, NULL, 0, 0);
-		else
-			ret =btrfs_drop_snapshot(root, NULL, 1, 0);
-		BUG_ON(ret < 0);
-	}
-	return 0;
+	if (btrfs_header_backref_rev(root->node) <
+			BTRFS_MIXED_BACKREF_REV)
+		ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+	else
+		ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+	/*
+	 * If we encounter a transaction abort during snapshot cleaning, we
+	 * don't want to crash here
+	 */
+	BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
+	return 1;
 }
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 3c8e0d2..24c9733 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,7 +123,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 
 int btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
-int btrfs_clean_old_snapshots(struct btrfs_root *root);
+int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root);
 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
@@ -146,5 +146,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
 				struct extent_io_tree *dirty_pages, int mark);
 int btrfs_transaction_blocked(struct btrfs_fs_info *info);
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
-void put_transaction(struct btrfs_transaction *transaction);
 #endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ef96381..c276ac9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -277,17 +277,19 @@ static int process_one_buffer(struct btrfs_root *log,
 			      struct extent_buffer *eb,
 			      struct walk_control *wc, u64 gen)
 {
+	int ret = 0;
+
 	if (wc->pin)
-		btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
-						eb->start, eb->len);
+		ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
+						      eb->start, eb->len);
 
-	if (btrfs_buffer_uptodate(eb, gen, 0)) {
+	if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
 		if (wc->write)
 			btrfs_write_tree_block(eb);
 		if (wc->wait)
 			btrfs_wait_tree_block_writeback(eb);
 	}
-	return 0;
+	return ret;
 }
 
 /*
@@ -408,9 +410,9 @@ insert:
 		found_size = btrfs_item_size_nr(path->nodes[0],
 						path->slots[0]);
 		if (found_size > item_size)
-			btrfs_truncate_item(trans, root, path, item_size, 1);
+			btrfs_truncate_item(root, path, item_size, 1);
 		else if (found_size < item_size)
-			btrfs_extend_item(trans, root, path,
+			btrfs_extend_item(root, path,
 					  item_size - found_size);
 	} else if (ret) {
 		return ret;
@@ -587,7 +589,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 
 	/* drop any overlapping extents */
 	ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -597,7 +600,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_insert_empty_item(trans, root, path, key,
 					      sizeof(*item));
-		BUG_ON(ret);
+		if (ret)
+			goto out;
 		dest_offset = btrfs_item_ptr_offset(path->nodes[0],
 						    path->slots[0]);
 		copy_extent_buffer(path->nodes[0], eb, dest_offset,
@@ -623,7 +627,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 						ins.objectid, ins.offset,
 						0, root->root_key.objectid,
 						key->objectid, offset, 0);
-				BUG_ON(ret);
+				if (ret)
+					goto out;
 			} else {
 				/*
 				 * insert the extent pointer in the extent
@@ -632,7 +637,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 				ret = btrfs_alloc_logged_file_extent(trans,
 						root, root->root_key.objectid,
 						key->objectid, offset, &ins);
-				BUG_ON(ret);
+				if (ret)
+					goto out;
 			}
 			btrfs_release_path(path);
 
@@ -649,26 +655,30 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 			ret = btrfs_lookup_csums_range(root->log_root,
 						csum_start, csum_end - 1,
 						&ordered_sums, 0);
-			BUG_ON(ret);
+			if (ret)
+				goto out;
 			while (!list_empty(&ordered_sums)) {
 				struct btrfs_ordered_sum *sums;
 				sums = list_entry(ordered_sums.next,
 						struct btrfs_ordered_sum,
 						list);
-				ret = btrfs_csum_file_blocks(trans,
+				if (!ret)
+					ret = btrfs_csum_file_blocks(trans,
 						root->fs_info->csum_root,
 						sums);
-				BUG_ON(ret);
 				list_del(&sums->list);
 				kfree(sums);
 			}
+			if (ret)
+				goto out;
 		} else {
 			btrfs_release_path(path);
 		}
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		/* inline extents are easy, we just overwrite them */
 		ret = overwrite_item(trans, root, path, eb, slot, key);
-		BUG_ON(ret);
+		if (ret)
+			goto out;
 	}
 
 	inode_add_bytes(inode, nbytes);
@@ -713,20 +723,21 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 
 	inode = read_one_inode(root, location.objectid);
 	if (!inode) {
-		kfree(name);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	ret = link_to_fixup_dir(trans, root, path, location.objectid);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
 
 	ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
+	btrfs_run_delayed_items(trans, root);
+out:
 	kfree(name);
-
 	iput(inode);
-
-	btrfs_run_delayed_items(trans, root);
 	return ret;
 }
 
@@ -879,7 +890,8 @@ again:
 			victim_name_len = btrfs_inode_ref_name_len(leaf,
 								   victim_ref);
 			victim_name = kmalloc(victim_name_len, GFP_NOFS);
-			BUG_ON(!victim_name);
+			if (!victim_name)
+				return -ENOMEM;
 
 			read_extent_buffer(leaf, victim_name,
 					   (unsigned long)(victim_ref + 1),
@@ -895,9 +907,10 @@ again:
 				ret = btrfs_unlink_inode(trans, root, dir,
 							 inode, victim_name,
 							 victim_name_len);
-				BUG_ON(ret);
-				btrfs_run_delayed_items(trans, root);
 				kfree(victim_name);
+				if (ret)
+					return ret;
+				btrfs_run_delayed_items(trans, root);
 				*search_done = 1;
 				goto again;
 			}
@@ -905,7 +918,6 @@ again:
 
 			ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
 		}
-		BUG_ON(ret);
 
 		/*
 		 * NOTE: we have searched root tree and checked the
@@ -939,6 +951,8 @@ again:
 				goto next;
 
 			victim_name = kmalloc(victim_name_len, GFP_NOFS);
+			if (!victim_name)
+				return -ENOMEM;
 			read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
 					   victim_name_len);
 
@@ -965,14 +979,16 @@ again:
 								 victim_name_len);
 					btrfs_run_delayed_items(trans, root);
 				}
-				BUG_ON(ret);
 				iput(victim_parent);
 				kfree(victim_name);
+				if (ret)
+					return ret;
 				*search_done = 1;
 				goto again;
 			}
 			kfree(victim_name);
-			BUG_ON(ret);
+			if (ret)
+				return ret;
 next:
 			cur_offset += victim_name_len + sizeof(*extref);
 		}
@@ -985,7 +1001,8 @@ next:
 					 ref_index, name, namelen, 0);
 	if (di && !IS_ERR(di)) {
 		ret = drop_one_dir_item(trans, root, path, dir, di);
-		BUG_ON(ret);
+		if (ret)
+			return ret;
 	}
 	btrfs_release_path(path);
 
@@ -994,7 +1011,8 @@ next:
 				   name, namelen, 0);
 	if (di && !IS_ERR(di)) {
 		ret = drop_one_dir_item(trans, root, path, dir, di);
-		BUG_ON(ret);
+		if (ret)
+			return ret;
 	}
 	btrfs_release_path(path);
 
@@ -1139,15 +1157,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 						      parent_objectid,
 						      ref_index, name, namelen,
 						      &search_done);
-				if (ret == 1)
+				if (ret == 1) {
+					ret = 0;
+					goto out;
+				}
+				if (ret)
 					goto out;
-				BUG_ON(ret);
 			}
 
 			/* insert our name */
 			ret = btrfs_add_link(trans, dir, inode, name, namelen,
 					     0, ref_index);
-			BUG_ON(ret);
+			if (ret)
+				goto out;
 
 			btrfs_update_inode(trans, root, inode);
 		}
@@ -1162,13 +1184,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 
 	/* finally write the back reference in the inode */
 	ret = overwrite_item(trans, root, path, eb, slot, key);
-	BUG_ON(ret);
-
 out:
 	btrfs_release_path(path);
 	iput(dir);
 	iput(inode);
-	return 0;
+	return ret;
 }
 
 static int insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -1326,10 +1346,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
 		if (S_ISDIR(inode->i_mode)) {
 			ret = replay_dir_deletes(trans, root, NULL, path,
 						 ino, 1);
-			BUG_ON(ret);
+			if (ret)
+				goto out;
 		}
 		ret = insert_orphan_item(trans, root, ino);
-		BUG_ON(ret);
 	}
 
 out:
@@ -1374,9 +1394,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
 			return -EIO;
 
 		ret = fixup_inode_link_count(trans, root, inode);
-		BUG_ON(ret);
-
 		iput(inode);
+		if (ret)
+			goto out;
 
 		/*
 		 * fixup on a directory may create new entries,
@@ -1426,7 +1446,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
 	} else if (ret == -EEXIST) {
 		ret = 0;
 	} else {
-		BUG();
+		BUG(); /* Logic Error */
 	}
 	iput(inode);
 
@@ -1495,7 +1515,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 	struct inode *dir;
 	u8 log_type;
 	int exists;
-	int ret;
+	int ret = 0;
 
 	dir = read_one_inode(root, key->objectid);
 	if (!dir)
@@ -1527,7 +1547,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 						     key->offset, name,
 						     name_len, 1);
 	} else {
-		BUG();
+		/* Corruption */
+		ret = -EINVAL;
+		goto out;
 	}
 	if (IS_ERR_OR_NULL(dst_di)) {
 		/* we need a sequence number to insert, so we only
@@ -1555,7 +1577,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 		goto out;
 
 	ret = drop_one_dir_item(trans, root, path, dir, dst_di);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
 
 	if (key->type == BTRFS_DIR_INDEX_KEY)
 		goto insert;
@@ -1563,14 +1586,15 @@ out:
 	btrfs_release_path(path);
 	kfree(name);
 	iput(dir);
-	return 0;
+	return ret;
 
 insert:
 	btrfs_release_path(path);
 	ret = insert_one_name(trans, root, path, key->objectid, key->offset,
 			      name, name_len, log_type, &log_key);
-
-	BUG_ON(ret && ret != -ENOENT);
+	if (ret && ret != -ENOENT)
+		goto out;
+	ret = 0;
 	goto out;
 }
 
@@ -1601,7 +1625,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
 			return -EIO;
 		name_len = btrfs_dir_name_len(eb, di);
 		ret = replay_one_name(trans, root, path, eb, di, key);
-		BUG_ON(ret);
+		if (ret)
+			return ret;
 		ptr = (unsigned long)(di + 1);
 		ptr += name_len;
 	}
@@ -1762,16 +1787,21 @@ again:
 
 			ret = link_to_fixup_dir(trans, root,
 						path, location.objectid);
-			BUG_ON(ret);
+			if (ret) {
+				kfree(name);
+				iput(inode);
+				goto out;
+			}
+
 			btrfs_inc_nlink(inode);
 			ret = btrfs_unlink_inode(trans, root, dir, inode,
 						 name, name_len);
-			BUG_ON(ret);
-
-			btrfs_run_delayed_items(trans, root);
-
+			if (!ret)
+				btrfs_run_delayed_items(trans, root);
 			kfree(name);
 			iput(inode);
+			if (ret)
+				goto out;
 
 			/* there might still be more names under this key
 			 * check and repeat if required
@@ -1875,7 +1905,8 @@ again:
 			ret = check_item_in_log(trans, root, log, path,
 						log_path, dir,
 						&found_key);
-			BUG_ON(ret);
+			if (ret)
+				goto out;
 			if (found_key.offset == (u64)-1)
 				break;
 			dir_key.offset = found_key.offset + 1;
@@ -1952,11 +1983,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 			if (S_ISDIR(mode)) {
 				ret = replay_dir_deletes(wc->trans,
 					 root, log, path, key.objectid, 0);
-				BUG_ON(ret);
+				if (ret)
+					break;
 			}
 			ret = overwrite_item(wc->trans, root, path,
 					     eb, i, &key);
-			BUG_ON(ret);
+			if (ret)
+				break;
 
 			/* for regular files, make sure corresponding
 			 * orhpan item exist. extents past the new EOF
@@ -1965,12 +1998,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 			if (S_ISREG(mode)) {
 				ret = insert_orphan_item(wc->trans, root,
 							 key.objectid);
-				BUG_ON(ret);
+				if (ret)
+					break;
 			}
 
 			ret = link_to_fixup_dir(wc->trans, root,
 						path, key.objectid);
-			BUG_ON(ret);
+			if (ret)
+				break;
 		}
 		if (wc->stage < LOG_WALK_REPLAY_ALL)
 			continue;
@@ -1979,28 +2014,35 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 		if (key.type == BTRFS_XATTR_ITEM_KEY) {
 			ret = overwrite_item(wc->trans, root, path,
 					     eb, i, &key);
-			BUG_ON(ret);
+			if (ret)
+				break;
 		} else if (key.type == BTRFS_INODE_REF_KEY) {
 			ret = add_inode_ref(wc->trans, root, log, path,
 					    eb, i, &key);
-			BUG_ON(ret && ret != -ENOENT);
+			if (ret && ret != -ENOENT)
+				break;
+			ret = 0;
 		} else if (key.type == BTRFS_INODE_EXTREF_KEY) {
 			ret = add_inode_ref(wc->trans, root, log, path,
 					    eb, i, &key);
-			BUG_ON(ret && ret != -ENOENT);
+			if (ret && ret != -ENOENT)
+				break;
+			ret = 0;
 		} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
 			ret = replay_one_extent(wc->trans, root, path,
 						eb, i, &key);
-			BUG_ON(ret);
+			if (ret)
+				break;
 		} else if (key.type == BTRFS_DIR_ITEM_KEY ||
 			   key.type == BTRFS_DIR_INDEX_KEY) {
 			ret = replay_one_dir_item(wc->trans, root, path,
 						  eb, i, &key);
-			BUG_ON(ret);
+			if (ret)
+				break;
 		}
 	}
 	btrfs_free_path(path);
-	return 0;
+	return ret;
 }
 
 static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
@@ -2045,8 +2087,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
 		if (*level == 1) {
 			ret = wc->process_func(root, next, wc, ptr_gen);
-			if (ret)
+			if (ret) {
+				free_extent_buffer(next);
 				return ret;
+			}
 
 			path->slots[*level]++;
 			if (wc->free) {
@@ -2066,7 +2110,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 					BTRFS_TREE_LOG_OBJECTID);
 				ret = btrfs_free_and_pin_reserved_extent(root,
 							 bytenr, blocksize);
-				BUG_ON(ret); /* -ENOMEM or logic errors */
+				if (ret) {
+					free_extent_buffer(next);
+					return ret;
+				}
 			}
 			free_extent_buffer(next);
 			continue;
@@ -2139,7 +2186,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 				ret = btrfs_free_and_pin_reserved_extent(root,
 						path->nodes[*level]->start,
 						path->nodes[*level]->len);
-				BUG_ON(ret);
+				if (ret)
+					return ret;
 			}
 			free_extent_buffer(path->nodes[*level]);
 			path->nodes[*level] = NULL;
@@ -2161,7 +2209,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 	int wret;
 	int level;
 	struct btrfs_path *path;
-	int i;
 	int orig_level;
 
 	path = btrfs_alloc_path();
@@ -2213,17 +2260,12 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 				BTRFS_TREE_LOG_OBJECTID);
 			ret = btrfs_free_and_pin_reserved_extent(log, next->start,
 							 next->len);
-			BUG_ON(ret); /* -ENOMEM or logic errors */
+			if (ret)
+				goto out;
 		}
 	}
 
 out:
-	for (i = 0; i <= orig_level; i++) {
-		if (path->nodes[i]) {
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-		}
-	}
 	btrfs_free_path(path);
 	return ret;
 }
@@ -2507,7 +2549,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
 	if (trans) {
 		ret = walk_log_tree(trans, log, &wc);
-		BUG_ON(ret);
+
+		/* I don't think this can happen but just in case */
+		if (ret)
+			btrfs_abort_transaction(trans, log, ret);
 	}
 
 	while (1) {
@@ -2615,7 +2660,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 	if (di) {
 		ret = btrfs_delete_one_dir_name(trans, log, path, di);
 		bytes_del += name_len;
-		BUG_ON(ret);
+		if (ret) {
+			err = ret;
+			goto fail;
+		}
 	}
 	btrfs_release_path(path);
 	di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
@@ -2627,7 +2675,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 	if (di) {
 		ret = btrfs_delete_one_dir_name(trans, log, path, di);
 		bytes_del += name_len;
-		BUG_ON(ret);
+		if (ret) {
+			err = ret;
+			goto fail;
+		}
 	}
 
 	/* update the directory size in the log to reflect the names
@@ -2966,7 +3017,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
 
 	while (1) {
 		ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
-		BUG_ON(ret == 0);
+		BUG_ON(ret == 0); /* Logic error */
 		if (ret < 0)
 			break;
 
@@ -3169,7 +3220,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 						log->fs_info->csum_root,
 						ds + cs, ds + cs + cl - 1,
 						&ordered_sums, 0);
-				BUG_ON(ret);
+				if (ret) {
+					btrfs_release_path(dst_path);
+					kfree(ins_data);
+					return ret;
+				}
 			}
 		}
 	}
@@ -3209,115 +3264,6 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return 0;
 }
 
-static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root, struct inode *inode,
-				 struct extent_map *em,
-				 struct btrfs_path *path)
-{
-	struct btrfs_file_extent_item *fi;
-	struct extent_buffer *leaf;
-	struct btrfs_key key, new_key;
-	struct btrfs_map_token token;
-	u64 extent_end;
-	u64 extent_offset = 0;
-	int extent_type;
-	int del_slot = 0;
-	int del_nr = 0;
-	int ret = 0;
-
-	while (1) {
-		btrfs_init_map_token(&token);
-		leaf = path->nodes[0];
-		path->slots[0]++;
-		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
-			if (del_nr) {
-				ret = btrfs_del_items(trans, root, path,
-						      del_slot, del_nr);
-				if (ret)
-					return ret;
-				del_nr = 0;
-			}
-
-			ret = btrfs_next_leaf_write(trans, root, path, 1);
-			if (ret < 0)
-				return ret;
-			if (ret > 0)
-				return 0;
-			leaf = path->nodes[0];
-		}
-
-		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-		if (key.objectid != btrfs_ino(inode) ||
-		    key.type != BTRFS_EXTENT_DATA_KEY ||
-		    key.offset >= em->start + em->len)
-			break;
-
-		fi = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_file_extent_item);
-		extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
-		if (extent_type == BTRFS_FILE_EXTENT_REG ||
-		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
-			extent_offset = btrfs_token_file_extent_offset(leaf,
-								fi, &token);
-			extent_end = key.offset +
-				btrfs_token_file_extent_num_bytes(leaf, fi,
-								  &token);
-		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-			extent_end = key.offset +
-				btrfs_file_extent_inline_len(leaf, fi);
-		} else {
-			BUG();
-		}
-
-		if (extent_end <= em->len + em->start) {
-			if (!del_nr) {
-				del_slot = path->slots[0];
-			}
-			del_nr++;
-			continue;
-		}
-
-		/*
-		 * Ok so we'll ignore previous items if we log a new extent,
-		 * which can lead to overlapping extents, so if we have an
-		 * existing extent we want to adjust we _have_ to check the next
-		 * guy to make sure we even need this extent anymore, this keeps
-		 * us from panicing in set_item_key_safe.
-		 */
-		if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
-			struct btrfs_key tmp_key;
-
-			btrfs_item_key_to_cpu(leaf, &tmp_key,
-					      path->slots[0] + 1);
-			if (tmp_key.objectid == btrfs_ino(inode) &&
-			    tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
-			    tmp_key.offset <= em->start + em->len) {
-				if (!del_nr)
-					del_slot = path->slots[0];
-				del_nr++;
-				continue;
-			}
-		}
-
-		BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
-		memcpy(&new_key, &key, sizeof(new_key));
-		new_key.offset = em->start + em->len;
-		btrfs_set_item_key_safe(trans, root, path, &new_key);
-		extent_offset += em->start + em->len - key.offset;
-		btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
-						   &token);
-		btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
-						      (em->start + em->len),
-						      &token);
-		btrfs_mark_buffer_dirty(leaf);
-	}
-
-	if (del_nr)
-		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
-
-	return ret;
-}
-
 static int log_one_extent(struct btrfs_trans_handle *trans,
 			  struct inode *inode, struct btrfs_root *root,
 			  struct extent_map *em, struct btrfs_path *path)
@@ -3339,39 +3285,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	int index = log->log_transid % 2;
 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-insert:
+	ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+				   em->start + em->len, NULL, 0);
+	if (ret)
+		return ret;
+
 	INIT_LIST_HEAD(&ordered_sums);
 	btrfs_init_map_token(&token);
 	key.objectid = btrfs_ino(inode);
 	key.type = BTRFS_EXTENT_DATA_KEY;
 	key.offset = em->start;
-	path->really_keep_locks = 1;
 
 	ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
-	if (ret && ret != -EEXIST) {
-		path->really_keep_locks = 0;
+	if (ret)
 		return ret;
-	}
 	leaf = path->nodes[0];
 	fi = btrfs_item_ptr(leaf, path->slots[0],
 			    struct btrfs_file_extent_item);
 
-	/*
-	 * If we are overwriting an inline extent with a real one then we need
-	 * to just delete the inline extent as it may not be large enough to
-	 * have the entire file_extent_item.
-	 */
-	if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
-	    BTRFS_FILE_EXTENT_INLINE) {
-		ret = btrfs_del_item(trans, log, path);
-		btrfs_release_path(path);
-		if (ret) {
-			path->really_keep_locks = 0;
-			return ret;
-		}
-		goto insert;
-	}
-
 	btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
 					       &token);
 	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3410,22 +3341,14 @@ insert:
 					   em->start - em->orig_start,
 					   &token);
 	btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
-	btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token);
+	btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
 	btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
 						&token);
 	btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
 	btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
 	btrfs_mark_buffer_dirty(leaf);
 
-	/*
-	 * Have to check the extent to the right of us to make sure it doesn't
-	 * fall in our current range.  We're ok if the previous extent is in our
-	 * range since the recovery stuff will run us in key order and thus just
-	 * drop the part we overwrote.
-	 */
-	ret = drop_adjacent_extents(trans, log, inode, em, path);
 	btrfs_release_path(path);
-	path->really_keep_locks = 0;
 	if (ret) {
 		return ret;
 	}
@@ -3650,8 +3573,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	bool fast_search = false;
 	u64 ino = btrfs_ino(inode);
 
-	log = root->log_root;
-
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -3918,9 +3839,9 @@ out:
  * only logging is done of any parent directories that are older than
  * the last committed transaction
  */
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-		    struct btrfs_root *root, struct inode *inode,
-		    struct dentry *parent, int exists_only)
+static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+			    	  struct btrfs_root *root, struct inode *inode,
+			    	  struct dentry *parent, int exists_only)
 {
 	int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
 	struct super_block *sb;
@@ -4111,6 +4032,9 @@ again:
 		wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
 		if (IS_ERR(wc.replay_dest)) {
 			ret = PTR_ERR(wc.replay_dest);
+			free_extent_buffer(log->node);
+			free_extent_buffer(log->commit_root);
+			kfree(log);
 			btrfs_error(fs_info, ret, "Couldn't read target root "
 				    "for tree log recovery.");
 			goto error;
@@ -4119,12 +4043,10 @@ again:
 		wc.replay_dest->log_root = log;
 		btrfs_record_root_in_trans(trans, wc.replay_dest);
 		ret = walk_log_tree(trans, log, &wc);
-		BUG_ON(ret);
 
-		if (wc.stage == LOG_WALK_REPLAY_ALL) {
+		if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
 			ret = fixup_inode_link_counts(trans, wc.replay_dest,
 						      path);
-			BUG_ON(ret);
 		}
 
 		key.offset = found_key.offset - 1;
@@ -4133,6 +4055,9 @@ again:
 		free_extent_buffer(log->commit_root);
 		kfree(log);
 
+		if (ret)
+			goto error;
+
 		if (found_key.offset == 0)
 			break;
 	}
@@ -4153,17 +4078,20 @@ again:
 
 	btrfs_free_path(path);
 
+	/* step 4: commit the transaction, which also unpins the blocks */
+	ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+	if (ret)
+		return ret;
+
 	free_extent_buffer(log_root_tree->node);
 	log_root_tree->log_root = NULL;
 	fs_info->log_root_recovering = 0;
-
-	/* step 4: commit the transaction, which also unpins the blocks */
-	btrfs_commit_transaction(trans, fs_info->tree_root);
-
 	kfree(log_root_tree);
-	return 0;
 
+	return 0;
 error:
+	if (wc.trans)
+		btrfs_end_transaction(wc.trans, fs_info->tree_root);
 	btrfs_free_path(path);
 	return ret;
 }
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 862ac81..1d4ae0d 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -40,9 +40,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
 			       struct inode *inode, u64 dirid);
 void btrfs_end_log_trans(struct btrfs_root *root);
 int btrfs_pin_log_trans(struct btrfs_root *root);
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-		    struct btrfs_root *root, struct inode *inode,
-		    struct dentry *parent, int exists_only);
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 			     struct inode *dir, struct inode *inode,
 			     int for_rename);
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index ddc61ca..7b417e2 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -53,6 +53,7 @@ void ulist_init(struct ulist *ulist)
 	ulist->nnodes = 0;
 	ulist->nodes = ulist->int_nodes;
 	ulist->nodes_alloced = ULIST_SIZE;
+	ulist->root = RB_ROOT;
 }
 EXPORT_SYMBOL(ulist_init);
 
@@ -72,6 +73,7 @@ void ulist_fini(struct ulist *ulist)
 	if (ulist->nodes_alloced > ULIST_SIZE)
 		kfree(ulist->nodes);
 	ulist->nodes_alloced = 0;	/* in case ulist_fini is called twice */
+	ulist->root = RB_ROOT;
 }
 EXPORT_SYMBOL(ulist_fini);
 
@@ -123,6 +125,45 @@ void ulist_free(struct ulist *ulist)
 }
 EXPORT_SYMBOL(ulist_free);
 
+static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
+{
+	struct rb_node *n = ulist->root.rb_node;
+	struct ulist_node *u = NULL;
+
+	while (n) {
+		u = rb_entry(n, struct ulist_node, rb_node);
+		if (u->val < val)
+			n = n->rb_right;
+		else if (u->val > val)
+			n = n->rb_left;
+		else
+			return u;
+	}
+	return NULL;
+}
+
+static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
+{
+	struct rb_node **p = &ulist->root.rb_node;
+	struct rb_node *parent = NULL;
+	struct ulist_node *cur = NULL;
+
+	while (*p) {
+		parent = *p;
+		cur = rb_entry(parent, struct ulist_node, rb_node);
+
+		if (cur->val < ins->val)
+			p = &(*p)->rb_right;
+		else if (cur->val > ins->val)
+			p = &(*p)->rb_left;
+		else
+			return -EEXIST;
+	}
+	rb_link_node(&ins->rb_node, parent, p);
+	rb_insert_color(&ins->rb_node, &ulist->root);
+	return 0;
+}
+
 /**
  * ulist_add - add an element to the ulist
  * @ulist:	ulist to add the element to
@@ -151,14 +192,13 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
 int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
 		    u64 *old_aux, gfp_t gfp_mask)
 {
-	int i;
-
-	for (i = 0; i < ulist->nnodes; ++i) {
-		if (ulist->nodes[i].val == val) {
-			if (old_aux)
-				*old_aux = ulist->nodes[i].aux;
-			return 0;
-		}
+	int ret = 0;
+	struct ulist_node *node = NULL;
+	node = ulist_rbtree_search(ulist, val);
+	if (node) {
+		if (old_aux)
+			*old_aux = node->aux;
+		return 0;
 	}
 
 	if (ulist->nnodes >= ulist->nodes_alloced) {
@@ -187,6 +227,8 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
 	}
 	ulist->nodes[ulist->nnodes].val = val;
 	ulist->nodes[ulist->nnodes].aux = aux;
+	ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
+	BUG_ON(ret);
 	++ulist->nnodes;
 
 	return 1;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 21a1963..fb36731 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -8,6 +8,9 @@
 #ifndef __ULIST__
 #define __ULIST__
 
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
 /*
  * ulist is a generic data structure to hold a collection of unique u64
  * values. The only operations it supports is adding to the list and
@@ -34,6 +37,7 @@ struct ulist_iterator {
 struct ulist_node {
 	u64 val;		/* value to store */
 	u64 aux;		/* auxiliary value saved along with the val */
+	struct rb_node rb_node;	/* used to speed up search */
 };
 
 struct ulist {
@@ -54,6 +58,8 @@ struct ulist {
 	 */
 	struct ulist_node *nodes;
 
+	struct rb_root root;
+
 	/*
 	 * inline storage space for the first ULIST_SIZE entries
 	 */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6789772..0e925ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -46,6 +46,7 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
 				struct btrfs_device *device);
 static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
+static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 
 static DEFINE_MUTEX(uuid_mutex);
@@ -717,9 +718,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		if (!device->name)
 			continue;
 
-		ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
-					    &bdev, &bh);
-		if (ret)
+		/* Just open everything we can; ignore failures here */
+		if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+					    &bdev, &bh))
 			continue;
 
 		disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1199,10 +1200,10 @@ out:
 	return ret;
 }
 
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
-			   struct btrfs_device *device,
-			   u64 chunk_tree, u64 chunk_objectid,
-			   u64 chunk_offset, u64 start, u64 num_bytes)
+static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+				  struct btrfs_device *device,
+				  u64 chunk_tree, u64 chunk_objectid,
+				  u64 chunk_offset, u64 start, u64 num_bytes)
 {
 	int ret;
 	struct btrfs_path *path;
@@ -1329,9 +1330,9 @@ error:
  * the device information is stored in the chunk root
  * the btrfs_device struct should be fully filled in
  */
-int btrfs_add_device(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_device *device)
+static int btrfs_add_device(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root,
+			    struct btrfs_device *device)
 {
 	int ret;
 	struct btrfs_path *path;
@@ -1710,8 +1711,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 }
 
-int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
-			      struct btrfs_device **device)
+static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
+				     struct btrfs_device **device)
 {
 	int ret = 0;
 	struct btrfs_super_block *disk_super;
@@ -3607,7 +3608,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
 	return 0;
 }
 
-struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
 	[BTRFS_RAID_RAID10] = {
 		.sub_stripes	= 2,
 		.dev_stripes	= 1,
@@ -3674,18 +3675,10 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
 
 static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 {
-	u64 features;
-
 	if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
 		return;
 
-	features = btrfs_super_incompat_flags(info->super_copy);
-	if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
-		return;
-
-	features |= BTRFS_FEATURE_INCOMPAT_RAID56;
-	btrfs_set_super_incompat_flags(info->super_copy, features);
-	printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
+	btrfs_set_fs_incompat(info, RAID56);
 }
 
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -3932,7 +3925,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	em_tree = &extent_root->fs_info->mapping_tree.map_tree;
 	write_lock(&em_tree->lock);
-	ret = add_extent_mapping(em_tree, em);
+	ret = add_extent_mapping(em_tree, em, 0);
 	write_unlock(&em_tree->lock);
 	if (ret) {
 		free_extent_map(em);
@@ -4240,9 +4233,25 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree, logical, len);
 	read_unlock(&em_tree->lock);
-	BUG_ON(!em);
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
+	/*
+	 * We could return errors for these cases, but that could get ugly and
+	 * we'd probably do the same thing which is just not do anything else
+	 * and exit, so return 1 so the callers don't try to use other copies.
+	 */
+	if (!em) {
+		btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+			    logical+len);
+		return 1;
+	}
+
+	if (em->start > logical || em->start + em->len < logical) {
+		btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+			    "%Lu-%Lu\n", logical, logical+len, em->start,
+			    em->start + em->len);
+		return 1;
+	}
+
 	map = (struct map_lookup *)em->bdev;
 	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
 		ret = map->num_stripes;
@@ -4411,13 +4420,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 	read_unlock(&em_tree->lock);
 
 	if (!em) {
-		printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n",
-		       (unsigned long long)logical,
-		       (unsigned long long)*length);
-		BUG();
+		btrfs_crit(fs_info, "unable to find logical %llu len %llu",
+			(unsigned long long)logical,
+			(unsigned long long)*length);
+		return -EINVAL;
+	}
+
+	if (em->start > logical || em->start + em->len < logical) {
+		btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
+			   "found %Lu-%Lu\n", logical, em->start,
+			   em->start + em->len);
+		return -EINVAL;
 	}
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = (struct map_lookup *)em->bdev;
 	offset = logical - em->start;
 
@@ -5106,9 +5121,9 @@ struct async_sched {
  * This will add one bio to the pending list for a device and make sure
  * the work struct is scheduled.
  */
-noinline void btrfs_schedule_bio(struct btrfs_root *root,
-				 struct btrfs_device *device,
-				 int rw, struct bio *bio)
+static noinline void btrfs_schedule_bio(struct btrfs_root *root,
+					struct btrfs_device *device,
+					int rw, struct bio *bio)
 {
 	int should_queue = 1;
 	struct btrfs_pending_bios *pending_bios;
@@ -5308,10 +5323,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 	}
 
 	if (map_length < length) {
-		printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu "
-		       "len %llu\n", (unsigned long long)logical,
-		       (unsigned long long)length,
-		       (unsigned long long)map_length);
+		btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
+			(unsigned long long)logical,
+			(unsigned long long)length,
+			(unsigned long long)map_length);
 		BUG();
 	}
 
@@ -5476,7 +5491,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 	}
 
 	write_lock(&map_tree->map_tree.lock);
-	ret = add_extent_mapping(&map_tree->map_tree, em);
+	ret = add_extent_mapping(&map_tree->map_tree, em, 0);
 	write_unlock(&map_tree->map_tree.lock);
 	BUG_ON(ret); /* Tree corruption */
 	free_extent_map(em);
@@ -5583,8 +5598,8 @@ static int read_one_dev(struct btrfs_root *root,
 			return -EIO;
 
 		if (!device) {
-			printk(KERN_WARNING "warning devid %llu missing\n",
-			       (unsigned long long)devid);
+			btrfs_warn(root->fs_info, "devid %llu missing",
+				(unsigned long long)devid);
 			device = add_missing_dev(root, devid, dev_uuid);
 			if (!device)
 				return -ENOMEM;
@@ -5926,7 +5941,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
 	btrfs_dev_stat_print_on_error(dev);
 }
 
-void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
+static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 {
 	if (!dev->dev_stats_valid)
 		return;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 062d860..845ccbb 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -254,10 +254,6 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
 #define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
 			    (sizeof(struct btrfs_bio_stripe) * (n)))
 
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
-			   struct btrfs_device *device,
-			   u64 chunk_tree, u64 chunk_objectid,
-			   u64 chunk_offset, u64 start, u64 num_bytes);
 int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		    u64 logical, u64 *length,
 		    struct btrfs_bio **bbio_ret, int mirror_num);
@@ -282,11 +278,6 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
 int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
 					 char *device_path,
 					 struct btrfs_device **device);
-int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
-			      struct btrfs_device **device);
-int btrfs_add_device(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_root *root, char *device_path);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -307,7 +298,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
 			 u64 *start, u64 *max_avail);
-void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_root *root,
 			struct btrfs_ioctl_get_dev_stats *stats);
@@ -321,9 +311,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 					      struct btrfs_device *tgtdev);
 int btrfs_scratch_superblock(struct btrfs_device *device);
-void btrfs_schedule_bio(struct btrfs_root *root,
-			struct btrfs_device *device,
-			int rw, struct bio *bio);
 int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
 			   u64 logical, u64 len, int mirror_num);
 unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 446a684..05740b9 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -406,8 +406,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
 				XATTR_REPLACE);
 }
 
-int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
-		     void *fs_info)
+static int btrfs_initxattrs(struct inode *inode,
+			    const struct xattr *xattr_array, void *fs_info)
 {
 	const struct xattr *xattr;
 	struct btrfs_trans_handle *trans = fs_info;
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 6a16053..09fe622 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -232,17 +232,10 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
 			struct inode *ecryptfs_inode)
 {
 	struct file *lower_file;
-	mm_segment_t fs_save;
-	ssize_t rc;
-
 	lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
 	if (!lower_file)
 		return -EIO;
-	fs_save = get_fs();
-	set_fs(get_ds());
-	rc = vfs_read(lower_file, data, size, &offset);
-	set_fs(fs_save);
-	return rc;
+	return kernel_read(lower_file, offset, data, size);
 }
 
 /**
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index cc33aaf..399e8ce 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -69,6 +69,19 @@ config XFS_RT
 
 	  If unsure, say N.
 
+config XFS_WARN
+	bool "XFS Verbose Warnings"
+	depends on XFS_FS && !XFS_DEBUG
+	help
+	  Say Y here to get an XFS build with many additional warnings.
+	  It converts ASSERT checks to WARN, so will log any out-of-bounds
+	  conditions that occur that would otherwise be missed. It is much
+	  lighter weight than XFS_DEBUG and does not modify algorithms and will
+	  not cause the kernel to panic on non-fatal errors.
+
+	  However, similar to XFS_DEBUG, it is only advisable to use this if you
+	  are debugging a particular problem.
+
 config XFS_DEBUG
 	bool "XFS Debugging support"
 	depends on XFS_FS
diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h
index ff6a198..e3c92d1 100644
--- a/fs/xfs/mrlock.h
+++ b/fs/xfs/mrlock.h
@@ -22,12 +22,12 @@
 
 typedef struct {
 	struct rw_semaphore	mr_lock;
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	int			mr_writer;
 #endif
 } mrlock_t;
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 #define mrinit(mrp, name)	\
 	do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
 #else
@@ -46,7 +46,7 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
 static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
 {
 	down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	mrp->mr_writer = 1;
 #endif
 }
@@ -60,7 +60,7 @@ static inline int mrtryupdate(mrlock_t *mrp)
 {
 	if (!down_write_trylock(&mrp->mr_lock))
 		return 0;
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	mrp->mr_writer = 1;
 #endif
 	return 1;
@@ -68,7 +68,7 @@ static inline int mrtryupdate(mrlock_t *mrp)
 
 static inline void mrunlock_excl(mrlock_t *mrp)
 {
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	mrp->mr_writer = 0;
 #endif
 	up_write(&mrp->mr_lock);
@@ -81,7 +81,7 @@ static inline void mrunlock_shared(mrlock_t *mrp)
 
 static inline void mrdemote(mrlock_t *mrp)
 {
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	mrp->mr_writer = 0;
 #endif
 	downgrade_write(&mrp->mr_lock);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index d8b11b7..a742c47 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,6 +24,11 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
+#ifdef CONFIG_XFS_WARN
+#define XFS_WARN 1
+#endif
+
+
 #include "xfs_linux.h"
 
 #endif	/* __XFS_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 30c4c14..cafc902 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -386,7 +386,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
 };
 
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_allocbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -442,7 +442,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
 	.init_ptr_from_cur	= xfs_allocbt_init_ptr_from_cur,
 	.key_diff		= xfs_allocbt_key_diff,
 	.buf_ops		= &xfs_allocbt_buf_ops,
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_allocbt_keys_inorder,
 	.recs_inorder		= xfs_allocbt_recs_inorder,
 #endif
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 3a86c3f..0c61a22 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -813,7 +813,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
 };
 
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_bmbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -853,7 +853,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.init_ptr_from_cur	= xfs_bmbt_init_ptr_from_cur,
 	.key_diff		= xfs_bmbt_key_diff,
 	.buf_ops		= &xfs_bmbt_buf_ops,
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
 #endif
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 6e6c915..55e3c7c 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -215,7 +215,7 @@ struct xfs_btree_ops {
 
 	const struct xfs_buf_ops	*buf_ops;
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	/* check that k1 is lower than k2 */
 	int	(*keys_inorder)(struct xfs_btree_cur *cur,
 				union xfs_btree_key *k1,
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ecc6c66..5246de4 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -993,7 +993,7 @@ xfs_dir2_leafn_rebalance(
 	xfs_dir2_leaf_t		*leaf1;		/* first leaf structure */
 	xfs_dir2_leaf_t		*leaf2;		/* second leaf structure */
 	int			mid;		/* midpoint leaf index */
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	int			oldstale;	/* old count of stale leaves */
 #endif
 	int			oldsum;		/* old total leaf count */
@@ -1022,7 +1022,7 @@ xfs_dir2_leafn_rebalance(
 	ents2 = xfs_dir3_leaf_ents_p(leaf2);
 
 	oldsum = hdr1.count + hdr2.count;
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	oldstale = hdr1.stale + hdr2.stale;
 #endif
 	mid = oldsum >> 1;
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c82ac88..5448eb6 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -272,7 +272,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
 	.verify_write = xfs_inobt_write_verify,
 };
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_inobt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -310,7 +310,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
 	.init_ptr_from_cur	= xfs_inobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
 #endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 558ef49..efbe1ac 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -287,7 +287,7 @@ xfs_ilock_demote(
 	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
 }
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 int
 xfs_isilocked(
 	xfs_inode_t		*ip,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d681e34..5e99968 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -422,9 +422,12 @@ xfs_attrlist_by_handle(
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
-	if (!kbuf)
-		goto out_dput;
+	kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
+	if (!kbuf) {
+		kbuf = kmem_zalloc_large(al_hreq.buflen);
+		if (!kbuf)
+			goto out_dput;
+	}
 
 	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
 	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -436,7 +439,10 @@ xfs_attrlist_by_handle(
 		error = -EFAULT;
 
  out_kfree:
-	kfree(kbuf);
+	if (is_vmalloc_addr(kbuf))
+		kmem_free_large(kbuf);
+	else
+		kmem_free(kbuf);
  out_dput:
 	dput(dentry);
 	return error;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 63b8fc4..c0c6625 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -373,9 +373,12 @@ xfs_compat_attrlist_by_handle(
 		return PTR_ERR(dentry);
 
 	error = -ENOMEM;
-	kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
-	if (!kbuf)
-		goto out_dput;
+	kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
+	if (!kbuf) {
+		kbuf = kmem_zalloc_large(al_hreq.buflen);
+		if (!kbuf)
+			goto out_dput;
+	}
 
 	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
 	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -387,7 +390,10 @@ xfs_compat_attrlist_by_handle(
 		error = -EFAULT;
 
  out_kfree:
-	kfree(kbuf);
+	if (is_vmalloc_addr(kbuf))
+		kmem_free_large(kbuf);
+	else
+		kmem_free(kbuf);
  out_dput:
 	dput(dentry);
 	return error;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 14e59d9..800f896 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -293,22 +293,34 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
 #define ASSERT_ALWAYS(expr)	\
 	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
-#ifndef DEBUG
-#define ASSERT(expr)	((void)0)
+#ifdef DEBUG
+#define ASSERT(expr)	\
+	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
 #ifndef STATIC
-# define STATIC static noinline
+# define STATIC noinline
 #endif
 
-#else /* DEBUG */
+#else	/* !DEBUG */
+
+#ifdef XFS_WARN
 
 #define ASSERT(expr)	\
-	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+	(unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
 
 #ifndef STATIC
-# define STATIC noinline
+# define STATIC static noinline
+#endif
+
+#else	/* !DEBUG && !XFS_WARN */
+
+#define ASSERT(expr)	((void)0)
+
+#ifndef STATIC
+# define STATIC static noinline
 #endif
 
+#endif /* XFS_WARN */
 #endif /* DEBUG */
 
 #endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 331cd9f..9163dc1 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -93,6 +93,14 @@ xfs_alert_tag(
 }
 
 void
+asswarn(char *expr, char *file, int line)
+{
+	xfs_warn(NULL, "Assertion failed: %s, file: %s, line: %d",
+		expr, file, line);
+	WARN_ON(1);
+}
+
+void
 assfail(char *expr, char *file, int line)
 {
 	xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 76c8198..8540115 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -57,6 +57,7 @@ do {									\
 	xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
 
 extern void assfail(char *expr, char *f, int l);
+extern void asswarn(char *expr, char *f, int l);
 
 extern void xfs_hex_dump(void *p, int length);
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index cd29f61..a44dba5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -405,7 +405,7 @@ typedef struct xfs_trans {
 	int64_t			t_res_fdblocks_delta; /* on-disk only chg */
 	int64_t			t_frextents_delta;/* superblock freextents chg*/
 	int64_t			t_res_frextents_delta; /* on-disk only chg */
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 	int64_t			t_ag_freeblks_delta; /* debugging counter */
 	int64_t			t_ag_flist_delta; /* debugging counter */
 	int64_t			t_ag_btree_delta; /* debugging counter */
@@ -433,7 +433,7 @@ typedef struct xfs_trans {
 #define	xfs_trans_get_block_res(tp)	((tp)->t_blk_res)
 #define	xfs_trans_set_sync(tp)		((tp)->t_flags |= XFS_TRANS_SYNC)
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(XFS_WARN)
 #define	xfs_trans_agblocks_delta(tp, d)	((tp)->t_ag_freeblks_delta += (int64_t)d)
 #define	xfs_trans_agflist_delta(tp, d)	((tp)->t_ag_flist_delta += (int64_t)d)
 #define	xfs_trans_agbtree_delta(tp, d)	((tp)->t_ag_btree_delta += (int64_t)d)
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index fa3a5f9..5ef0df5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
 
 #define BTRFS_QUOTA_CTL_ENABLE	1
 #define BTRFS_QUOTA_CTL_DISABLE	2
-#define BTRFS_QUOTA_CTL_RESCAN	3
+#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
 struct btrfs_ioctl_quota_ctl_args {
 	__u64 cmd;
 	__u64 status;
 };
 
+struct btrfs_ioctl_quota_rescan_args {
+	__u64	flags;
+	__u64   progress;
+	__u64   reserved[6];
+};
+
 struct btrfs_ioctl_qgroup_assign_args {
 	__u64 assign;
 	__u64 src;
@@ -412,7 +418,25 @@ struct btrfs_ioctl_received_subvol_args {
  * search of clone sources doesn't find an extent. UPDATE_EXTENT
  * commands will be sent instead of WRITE commands.
  */
-#define BTRFS_SEND_FLAG_NO_FILE_DATA     0x1
+#define BTRFS_SEND_FLAG_NO_FILE_DATA		0x1
+
+/*
+ * Do not add the leading stream header. Used when multiple snapshots
+ * are sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER	0x2
+
+/*
+ * Omit the command at the end of the stream that indicated the end
+ * of the stream. This option is used when multiple snapshots are
+ * sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_END_CMD		0x4
+
+#define BTRFS_SEND_FLAG_MASK \
+	(BTRFS_SEND_FLAG_NO_FILE_DATA | \
+	 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
+	 BTRFS_SEND_FLAG_OMIT_END_CMD)
 
 struct btrfs_ioctl_send_args {
 	__s64 send_fd;			/* in */
@@ -502,6 +526,10 @@ struct btrfs_ioctl_send_args {
 			       struct btrfs_ioctl_qgroup_create_args)
 #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
 			       struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
+			       struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
+			       struct btrfs_ioctl_quota_rescan_args)
 #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
 				   char[BTRFS_LABEL_SIZE])
 #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
diff --git a/ipc/sem.c b/ipc/sem.c
index 899b598..a7e40ed 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -796,6 +796,13 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	struct sem_queue * q;
 
 	semncnt = 0;
+	list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
+		struct sembuf * sops = q->sops;
+		BUG_ON(sops->sem_num != semnum);
+		if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
+			semncnt++;
+	}
+
 	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
@@ -815,6 +822,13 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	struct sem_queue * q;
 
 	semzcnt = 0;
+	list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
+		struct sembuf * sops = q->sops;
+		BUG_ON(sops->sem_num != semnum);
+		if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
+			semzcnt++;
+	}
+
 	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
diff --git a/ipc/shm.c b/ipc/shm.c
index 34af1fe..7e199fa 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -493,7 +493,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	if (shmflg & SHM_HUGETLB) {
 		struct hstate *hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT)
 						& SHM_HUGE_MASK);
-		size_t hugesize = ALIGN(size, huge_page_size(hs));
+		size_t hugesize;
+
+		if (!hs) {
+			error = -EINVAL;
+			goto no_file;
+		}
+		hugesize = ALIGN(size, huge_page_size(hs));
 
 		/* hugetlb_file_setup applies strict accounting */
 		if (shmflg & SHM_NORESERVE)
diff --git a/mm/mmap.c b/mm/mmap.c
index 1ae21d6..f681e18 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1367,9 +1367,13 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 			len = ALIGN(len, huge_page_size(hstate_file(file)));
 	} else if (flags & MAP_HUGETLB) {
 		struct user_struct *user = NULL;
+		struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) &
+						   SHM_HUGE_MASK);
 
-		len = ALIGN(len, huge_page_size(hstate_sizelog(
-			(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)));
+		if (!hs)
+			return -EINVAL;
+
+		len = ALIGN(len, huge_page_size(hs));
 		/*
 		 * VM_NORESERVE is used because the reservations will be
 		 * taken when vm_ops->mmap() is called