334 files changed, 11522 insertions, 6914 deletions
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
index 1fd1753..4349c14 100644
--- a/Documentation/RCU/listRCU.txt
+++ b/Documentation/RCU/listRCU.txt
@@ -118,7 +118,7 @@ Following are the RCU equivalents for these two functions:
 		list_for_each_entry(e, list, list) {
 			if (!audit_compare_rule(rule, &e->rule)) {
 				list_del_rcu(&e->list);
-				call_rcu(&e->rcu, audit_free_rule, e);
+				call_rcu(&e->rcu, audit_free_rule);
 				return 0;
 			}
 		}
@@ -206,7 +206,7 @@ RCU ("read-copy update") its name.  The RCU code is as follows:
 				ne->rule.action = newaction;
 				ne->rule.file_count = newfield_count;
 				list_replace_rcu(e, ne);
-				call_rcu(&e->rcu, audit_free_rule, e);
+				call_rcu(&e->rcu, audit_free_rule);
 				return 0;
 			}
 		}
@@ -283,7 +283,7 @@ flag under the spinlock as follows:
 				list_del_rcu(&e->list);
 				e->deleted = 1;
 				spin_unlock(&e->lock);
-				call_rcu(&e->rcu, audit_free_rule, e);
+				call_rcu(&e->rcu, audit_free_rule);
 				return 0;
 			}
 		}
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 95821a2..7aa2002 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -81,7 +81,7 @@ o	I hear that RCU needs work in order to support realtime kernels?
 	This work is largely completed.  Realtime-friendly RCU can be
 	enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
 	However, work is in progress for enabling priority boosting of
-	preempted RCU read-side critical sections.This is needed if you
+	preempted RCU read-side critical sections.  This is needed if you
 	have CPU-bound realtime threads.
 
 o	Where can I find more information on RCU?
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 239f542..6389dec 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -21,7 +21,7 @@ if (obj) {
   /*
    * Because a writer could delete object, and a writer could
    * reuse these object before the RCU grace period, we
-   * must check key after geting the reference on object
+   * must check key after getting the reference on object
    */
   if (obj->key != key) { // not the object we expected
      put_ref(obj);
@@ -117,7 +117,7 @@ a race (some writer did a delete and/or a move of an object
 to another chain) checking the final 'nulls' value if
 the lookup met the end of chain. If final 'nulls' value
 is not the slot number, then we must restart the lookup at
-the begining. If the object was moved to same chain,
+the beginning. If the object was moved to the same chain,
 then the reader doesnt care : It might eventually
 scan the list again without harm.
 
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt
new file mode 100644
index 0000000..0ced74c
--- /dev/null
+++ b/Documentation/filesystems/exofs.txt
@@ -0,0 +1,176 @@
+===============================================================================
+WHAT IS EXOFS?
+===============================================================================
+
+exofs is a file system that uses an OSD and exports the API of a normal Linux
+file system. Users access exofs like any other local file system, and exofs
+will in turn issue commands to the local OSD initiator.
+
+OSD is a new T10 command set that views storage devices not as a large/flat
+array of sectors but as a container of objects, each having a length, quota,
+time attributes and more. Each object is addressed by a 64bit ID, and is
+contained in a 64bit ID partition. Each object has associated attributes
+attached to it, which are integral part of the object and provide metadata about
+the object. The standard defines some common obligatory attributes, but user
+attributes can be added as needed.
+
+===============================================================================
+ENVIRONMENT
+===============================================================================
+
+To use this file system, you need to have an object store to run it on.  You
+may download a target from:
+http://open-osd.org
+
+See Documentation/scsi/osd.txt for how to setup a working osd environment.
+
+===============================================================================
+USAGE
+===============================================================================
+
+1. Download and compile exofs and open-osd initiator:
+  You need an external Kernel source tree or kernel headers from your
+  distribution. (anything based on 2.6.26 or later).
+
+  a. download open-osd including exofs source using:
+     [parent-directory]$ git clone git://git.open-osd.org/open-osd.git
+
+  b. Build the library module like this:
+     [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
+
+     This will build both the open-osd initiator as well as the exofs kernel
+     module. Use whatever parameters you compiled your Kernel with and
+     $(KER_DIR) above pointing to the Kernel you compile against. See the file
+     open-osd/top-level-Makefile for an example.
+
+2. Get the OSD initiator and target set up properly, and login to the target.
+  See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
+  for example script that does all these steps.
+
+3. Insmod the exofs.ko module:
+   [exofs]$ insmod exofs.ko
+
+4. Make sure the directory where you want to mount exists. If not, create it.
+   (For example, mkdir /mnt/exofs)
+
+5. At first run you will need to invoke the mkfs.exofs application
+
+   As an example, this will create the file system on:
+   /dev/osd0 partition ID 65536
+
+   mkfs.exofs --pid=65536 --format /dev/osd0
+
+   The --format is optional if not specified no OSD_FORMAT will be
+   preformed and a clean file system will be created in the specified pid,
+   in the available space of the target. (Use --format=size_in_meg to limit
+   the total LUN space available)
+
+   If pid already exist it will be deleted and a new one will be created in it's
+   place. Be careful.
+
+   An exofs lives inside a single OSD partition. You can create multiple exofs
+   filesystems on the same device using multiple pids.
+
+   (run mkfs.exofs without any parameters for usage help message)
+
+6. Mount the file system.
+
+   For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
+
+	mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
+
+7. For reference (See do-exofs example script):
+	do-exofs start - an example of how to perform the above steps.
+	do-exofs stop -  an example of how to unmount the file system.
+	do-exofs format - an example of how to format and mkfs a new exofs.
+
+8. Extra compilation flags (uncomment in fs/exofs/Kbuild):
+	CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
+
+===============================================================================
+exofs mount options
+===============================================================================
+Similar to any mount command:
+	mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
+
+Where:
+    -t exofs: specifies the exofs file system
+
+    /dev/osdX: X is a decimal number. /dev/osdX was created after a successful
+               login into an OSD target.
+
+    mount_exofs_directory: The directory to mount the file system on
+
+    exofs specific options: Options are separated by commas (,)
+		pid=<integer> - The partition number to mount/create as
+                                container of the filesystem.
+                                This option is mandatory
+                to=<integer>  - Timeout in ticks for a single command
+                                default is (60 * HZ) [for debugging only]
+
+===============================================================================
+DESIGN
+===============================================================================
+
+* The file system control block (AKA on-disk superblock) resides in an object
+  with a special ID (defined in common.h).
+  Information included in the file system control block is used to fill the
+  in-memory superblock structure at mount time. This object is created before
+  the file system is used by mkexofs.c It contains information such as:
+	- The file system's magic number
+	- The next inode number to be allocated
+
+* Each file resides in its own object and contains the data (and it will be
+  possible to extend the file over multiple objects, though this has not been
+  implemented yet).
+
+* A directory is treated as a file, and essentially contains a list of <file
+  name, inode #> pairs for files that are found in that directory. The object
+  IDs correspond to the files' inode numbers and will be allocated according to
+  a bitmap (stored in a separate object). Now they are allocated using a
+  counter.
+
+* Each file's control block (AKA on-disk inode) is stored in its object's
+  attributes. This applies to both regular files and other types (directories,
+  device files, symlinks, etc.).
+
+* Credentials are generated per object (inode and superblock) when they is
+  created in memory (read off disk or created). The credential works for all
+  operations and is used as long as the object remains in memory.
+
+* Async OSD operations are used whenever possible, but the target may execute
+  them out of order. The operations that concern us are create, delete,
+  readpage, writepage, update_inode, and truncate. The following pairs of
+  operations should execute in the order written, and we need to prevent them
+  from executing in reverse order:
+	- The following are handled with the OBJ_CREATED and OBJ_2BCREATED
+	  flags. OBJ_CREATED is set when we know the object exists on the OSD -
+	  in create's callback function, and when we successfully do a read_inode.
+	  OBJ_2BCREATED is set in the beginning of the create function, so we
+	  know that we should wait.
+		- create/delete: delete should wait until the object is created
+		  on the OSD.
+		- create/readpage: readpage should be able to return a page
+		  full of zeroes in this case. If there was a write already
+		  en-route (i.e. create, writepage, readpage) then the page
+		  would be locked, and so it would really be the same as
+		  create/writepage.
+		- create/writepage: if writepage is called for a sync write, it
+		  should wait until the object is created on the OSD.
+		  Otherwise, it should just return.
+		- create/truncate: truncate should wait until the object is
+		  created on the OSD.
+		- create/update_inode: update_inode should wait until the
+		  object is created on the OSD.
+	- Handled by VFS locks:
+		- readpage/delete: shouldn't happen because of page lock.
+		- writepage/delete: shouldn't happen because of page lock.
+		- readpage/writepage: shouldn't happen because of page lock.
+
+===============================================================================
+LICENSE/COPYRIGHT
+===============================================================================
+The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
+version 2.6.10).  All files include the original copyrights, and the license
+is GPL version 2 (only version 2, as is true for the Linux kernel).  The
+Linux kernel can be downloaded from www.kernel.org.
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
index fde829a..902b95d 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.txt
@@ -24,6 +24,8 @@ The following mount options are supported:
 
 	gid=		Set the default group.
 	umask=		Set the default umask.
+	mode=		Set the default file permissions.
+	dmode=		Set the default directory permissions.
 	uid=		Set the default user.
 	bs=		Set the block size.
 	unhide		Show otherwise hidden files.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 240257d..bdc0c43 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1523,7 +1523,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
 
-	nohlt		[BUGS=ARM,SH]
+	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
+			wfi(ARM) instruction doesn't work correctly and not to
+			use it. This is also useful when using JTAG debugger.
 
 	no-hlt		[BUGS=X86-32] Tells the kernel that the hlt
 			instruction doesn't work correctly and not to
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 1da9d1b..4edd39e 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -164,15 +164,19 @@ All md devices contain:
   raid_disks
      a text file with a simple number indicating the number of devices
      in a fully functional array.  If this is not yet known, the file
-     will be empty.  If an array is being resized (not currently
-     possible) this will contain the larger of the old and new sizes.
-     Some raid level (RAID1) allow this value to be set while the
-     array is active.  This will reconfigure the array.   Otherwise
-     it can only be set while assembling an array.
+     will be empty.  If an array is being resized this will contain
+     the new number of devices.
+     Some raid levels allow this value to be set while the array is
+     active.  This will reconfigure the array.   Otherwise it can only
+     be set while assembling an array.
+     A change to this attribute will not be permitted if it would
+     reduce the size of the array.  To reduce the number of drives
+     in an e.g. raid5, the array size must first be reduced by
+     setting the 'array_size' attribute.
 
   chunk_size
-     This is the size if bytes for 'chunks' and is only relevant to
-     raid levels that involve striping (1,4,5,6,10). The address space
+     This is the size in bytes for 'chunks' and is only relevant to
+     raid levels that involve striping (0,4,5,6,10). The address space
      of the array is conceptually divided into chunks and consecutive
      chunks are striped onto neighbouring devices.
      The size should be at least PAGE_SIZE (4k) and should be a power
@@ -183,6 +187,20 @@ All md devices contain:
      simply a number that is interpretted differently by different
      levels.  It can be written while assembling an array.
 
+  array_size
+     This can be used to artificially constrain the available space in
+     the array to be less than is actually available on the combined
+     devices.  Writing a number (in Kilobytes) which is less than
+     the available size will set the size.  Any reconfiguration of the
+     array (e.g. adding devices) will not cause the size to change.
+     Writing the word 'default' will cause the effective size of the
+     array to be whatever size is actually available based on
+     'level', 'chunk_size' and 'component_size'.
+
+     This can be used to reduce the size of the array before reducing
+     the number of devices in a raid4/5/6, or to support external
+     metadata formats which mandate such clipping.
+
   reshape_position
      This is either "none" or a sector number within the devices of
      the array where "reshape" is up to.  If this is set, the three
@@ -207,6 +225,11 @@ All md devices contain:
      about the array.  It can be 0.90 (traditional format), 1.0, 1.1,
      1.2 (newer format in varying locations) or "none" indicating that
      the kernel isn't managing metadata at all.
+     Alternately it can be "external:" followed by a string which
+     is set by user-space.  This indicates that metadata is managed
+     by a user-space program.  Any device failure or other event that
+     requires a metadata update will cause array activity to be
+     suspended until the event is acknowledged.
 
   resync_start
      The point at which resync should start.  If no resync is needed,
diff --git a/MAINTAINERS b/MAINTAINERS
index ebaf77e..9082266 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4969,7 +4969,8 @@ S:	Supported
 
 XFS FILESYSTEM
 P:	Silicon Graphics Inc
-P:	Bill O'Donnell
+P:	Felix Blyakher
+M:	felixb@sgi.com
 M:	xfs-masters@oss.sgi.com
 L:	xfs@oss.sgi.com
 W:	http://oss.sgi.com/projects/xfs
diff --git a/arch/arm/configs/omap_ldp_defconfig b/arch/arm/configs/omap_ldp_defconfig
index aa9d34f..679a4a3 100644
--- a/arch/arm/configs/omap_ldp_defconfig
+++ b/arch/arm/configs/omap_ldp_defconfig
@@ -474,14 +474,34 @@ CONFIG_NETDEVICES=y
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
 # CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 # CONFIG_SMC91X is not set
 # CONFIG_DM9000 is not set
 # CONFIG_ENC28J60 is not set
-CONFIG_SMC911X=y
+# CONFIG_SMC911X is not set
+CONFIG_SMSC911X=y
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/pcm037_defconfig b/arch/arm/configs/pcm037_defconfig
index 6274745..6e37c77 100644
--- a/arch/arm/configs/pcm037_defconfig
+++ b/arch/arm/configs/pcm037_defconfig
@@ -465,12 +465,33 @@ CONFIG_NETDEVICES=y
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
 # CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 CONFIG_SMC91X=y
 # CONFIG_DM9000 is not set
+# CONFIG_SMC911X is not set
+CONFIG_SMSC911X=y
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/realview-smp_defconfig b/arch/arm/configs/realview-smp_defconfig
index cd29824..21db4b3 100644
--- a/arch/arm/configs/realview-smp_defconfig
+++ b/arch/arm/configs/realview-smp_defconfig
@@ -496,13 +496,33 @@ CONFIG_NETDEVICES=y
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
 # CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 CONFIG_SMC91X=y
 # CONFIG_DM9000 is not set
-CONFIG_SMC911X=y
+# CONFIG_SMC911X is not set
+CONFIG_SMSC911X=y
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig
index 7e253f5..9a75c30 100644
--- a/arch/arm/configs/realview_defconfig
+++ b/arch/arm/configs/realview_defconfig
@@ -490,13 +490,33 @@ CONFIG_NETDEVICES=y
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
 # CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 CONFIG_SMC91X=y
 # CONFIG_DM9000 is not set
-CONFIG_SMC911X=y
+# CONFIG_SMC911X is not set
+CONFIG_SMSC911X=y
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 7ac812d..e26c4fe 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -198,17 +198,17 @@ static int at91_pm_verify_clocks(void)
 	/* USB must not be using PLLB */
 	if (cpu_is_at91rm9200()) {
 		if ((scsr & (AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP)) != 0) {
-			pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
+			pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
 			return 0;
 		}
 	} else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
 		if ((scsr & (AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP)) != 0) {
-			pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
+			pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
 			return 0;
 		}
 	} else if (cpu_is_at91cap9()) {
 		if ((scsr & AT91CAP9_PMC_UHP) != 0) {
-			pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
+			pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
 			return 0;
 		}
 	}
@@ -223,7 +223,7 @@ static int at91_pm_verify_clocks(void)
 
 		css = at91_sys_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
 		if (css != AT91_PMC_CSS_SLOW) {
-			pr_debug("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
+			pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
 			return 0;
 		}
 	}
diff --git a/arch/arm/mach-gemini/include/mach/system.h b/arch/arm/mach-gemini/include/mach/system.h
index bbbd727..4d9c1f8 100644
--- a/arch/arm/mach-gemini/include/mach/system.h
+++ b/arch/arm/mach-gemini/include/mach/system.h
@@ -28,7 +28,7 @@ static inline void arch_idle(void)
 	cpu_do_idle();
 }
 
-static inline void arch_reset(char mode)
+static inline void arch_reset(char mode, const char *cmd)
 {
 	__raw_writel(RESET_GLOBAL | RESET_CPU1,
 		     IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
diff --git a/arch/arm/mach-mmp/include/mach/system.h b/arch/arm/mach-mmp/include/mach/system.h
index 001edfe..4f5b0e0 100644
--- a/arch/arm/mach-mmp/include/mach/system.h
+++ b/arch/arm/mach-mmp/include/mach/system.h
@@ -14,7 +14,7 @@ static inline void arch_idle(void)
 	cpu_do_idle();
 }
 
-static inline void arch_reset(char mode)
+static inline void arch_reset(char mode, const char *cmd)
 {
 	cpu_reset(0);
 }
diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 5fce022..c3648ef 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -24,7 +24,7 @@
 #include <linux/mtd/plat-ram.h>
 #include <linux/memory.h>
 #include <linux/gpio.h>
-#include <linux/smc911x.h>
+#include <linux/smsc911x.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/i2c/at24.h>
@@ -70,7 +70,7 @@ static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
-static struct resource smc911x_resources[] = {
+static struct resource smsc911x_resources[] = {
 	[0] = {
 		.start		= CS1_BASE_ADDR + 0x300,
 		.end		= CS1_BASE_ADDR + 0x300 + SZ_64K - 1,
@@ -79,22 +79,25 @@ static struct resource smc911x_resources[] = {
 	[1] = {
 		.start		= IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
 		.end		= IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
-		.flags		= IORESOURCE_IRQ,
+		.flags		= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
 	},
 };
 
-static struct smc911x_platdata smc911x_info = {
-	.flags		= SMC911X_USE_32BIT,
-	.irq_flags	= IRQF_SHARED | IRQF_TRIGGER_LOW,
+static struct smsc911x_platform_config smsc911x_info = {
+	.flags		= SMSC911X_USE_32BIT | SMSC911X_FORCE_INTERNAL_PHY |
+			  SMSC911X_SAVE_MAC_ADDRESS,
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
 };
 
 static struct platform_device pcm037_eth = {
-	.name		= "smc911x",
+	.name		= "smsc911x",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(smc911x_resources),
-	.resource	= smc911x_resources,
+	.num_resources	= ARRAY_SIZE(smsc911x_resources),
+	.resource	= smsc911x_resources,
 	.dev		= {
-		.platform_data = &smc911x_info,
+		.platform_data = &smsc911x_info,
 	},
 };
 
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a2c3fcc..c49d9bf 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -47,6 +47,8 @@ obj-$(CONFIG_MACH_OMAP_3430SDP)		+= board-3430sdp.o \
 
 obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 					   board-rx51-peripherals.o \
+					   mmc-twl4030.o
+
 # Platform specific device init code
 ifeq ($(CONFIG_USB_MUSB_SOC),y)
 obj-y					+= usb-musb.o
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index e096f77..da57b0f 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -23,6 +23,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/i2c/twl4030.h>
 #include <linux/io.h>
+#include <linux/smsc911x.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -41,12 +42,12 @@
 
 #include "mmc-twl4030.h"
 
-#define LDP_SMC911X_CS		1
-#define LDP_SMC911X_GPIO	152
+#define LDP_SMSC911X_CS		1
+#define LDP_SMSC911X_GPIO	152
 #define DEBUG_BASE		0x08000000
 #define LDP_ETHR_START		DEBUG_BASE
 
-static struct resource ldp_smc911x_resources[] = {
+static struct resource ldp_smsc911x_resources[] = {
 	[0] = {
 		.start	= LDP_ETHR_START,
 		.end	= LDP_ETHR_START + SZ_4K,
@@ -59,40 +60,50 @@ static struct resource ldp_smc911x_resources[] = {
 	},
 };
 
-static struct platform_device ldp_smc911x_device = {
-	.name		= "smc911x",
+static struct smsc911x_platform_config ldp_smsc911x_config = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.flags		= SMSC911X_USE_32BIT,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+static struct platform_device ldp_smsc911x_device = {
+	.name		= "smsc911x",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(ldp_smc911x_resources),
-	.resource	= ldp_smc911x_resources,
+	.num_resources	= ARRAY_SIZE(ldp_smsc911x_resources),
+	.resource	= ldp_smsc911x_resources,
+	.dev		= {
+		.platform_data = &ldp_smsc911x_config,
+	},
 };
 
 static struct platform_device *ldp_devices[] __initdata = {
-	&ldp_smc911x_device,
+	&ldp_smsc911x_device,
 };
 
-static inline void __init ldp_init_smc911x(void)
+static inline void __init ldp_init_smsc911x(void)
 {
 	int eth_cs;
 	unsigned long cs_mem_base;
 	int eth_gpio = 0;
 
-	eth_cs = LDP_SMC911X_CS;
+	eth_cs = LDP_SMSC911X_CS;
 
 	if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
-		printk(KERN_ERR "Failed to request GPMC mem for smc911x\n");
+		printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n");
 		return;
 	}
 
-	ldp_smc911x_resources[0].start = cs_mem_base + 0x0;
-	ldp_smc911x_resources[0].end   = cs_mem_base + 0xff;
+	ldp_smsc911x_resources[0].start = cs_mem_base + 0x0;
+	ldp_smsc911x_resources[0].end   = cs_mem_base + 0xff;
 	udelay(100);
 
-	eth_gpio = LDP_SMC911X_GPIO;
+	eth_gpio = LDP_SMSC911X_GPIO;
 
-	ldp_smc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio);
+	ldp_smsc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio);
 
-	if (gpio_request(eth_gpio, "smc911x irq") < 0) {
-		printk(KERN_ERR "Failed to request GPIO%d for smc911x IRQ\n",
+	if (gpio_request(eth_gpio, "smsc911x irq") < 0) {
+		printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n",
 				eth_gpio);
 		return;
 	}
@@ -104,7 +115,7 @@ static void __init omap_ldp_init_irq(void)
 	omap2_init_common_hw(NULL);
 	omap_init_irq();
 	omap_gpio_init();
-	ldp_init_smc911x();
+	ldp_init_smsc911x();
 }
 
 static struct omap_uart_config ldp_uart_config __initdata = {
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index b3f6e9d..b1f23be 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -57,6 +57,9 @@
 #define GPMC_CS0_BASE  0x60
 #define GPMC_CS_SIZE   0x30
 
+#define OVERO_SMSC911X_CS      5
+#define OVERO_SMSC911X_GPIO    176
+
 #if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
 	defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
 
@@ -116,6 +119,67 @@ static void __init overo_ads7846_init(void)
 static inline void __init overo_ads7846_init(void) { return; }
 #endif
 
+#if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE)
+
+#include <linux/smsc911x.h>
+
+static struct resource overo_smsc911x_resources[] = {
+	{
+		.name	= "smsc911x-memory",
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
+	},
+};
+
+static struct smsc911x_platform_config overo_smsc911x_config = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.flags		= SMSC911X_USE_32BIT ,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+static struct platform_device overo_smsc911x_device = {
+	.name		= "smsc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(overo_smsc911x_resources),
+	.resource	= &overo_smsc911x_resources,
+	.dev		= {
+		.platform_data = &overo_smsc911x_config,
+	},
+};
+
+static inline void __init overo_init_smsc911x(void)
+{
+	unsigned long cs_mem_base;
+
+	if (gpmc_cs_request(OVERO_SMSC911X_CS, SZ_16M, &cs_mem_base) < 0) {
+		printk(KERN_ERR "Failed request for GPMC mem for smsc911x\n");
+		return;
+	}
+
+	overo_smsc911x_resources[0].start = cs_mem_base + 0x0;
+	overo_smsc911x_resources[0].end   = cs_mem_base + 0xff;
+
+	if ((gpio_request(OVERO_SMSC911X_GPIO, "SMSC911X IRQ") == 0) &&
+	    (gpio_direction_input(OVERO_SMSC911X_GPIO) == 0)) {
+		gpio_export(OVERO_SMSC911X_GPIO, 0);
+	} else {
+		printk(KERN_ERR "could not obtain gpio for SMSC911X IRQ\n");
+		return;
+	}
+
+	overo_smsc911x_resources[1].start = OMAP_GPIO_IRQ(OVERO_SMSC911X_GPIO);
+	overo_smsc911x_resources[1].end	  = 0;
+
+	platform_device_register(&overo_smsc911x_device);
+}
+
+#else
+static inline void __init overo_init_smsc911x(void) { return; }
+#endif
+
 static struct mtd_partition overo_nand_partitions[] = {
 	{
 		.name           = "xloader",
@@ -290,6 +354,7 @@ static void __init overo_init(void)
 	overo_flash_init();
 	usb_musb_init();
 	overo_ads7846_init();
+	overo_init_smsc911x();
 
 	if ((gpio_request(OVERO_GPIO_W2W_NRESET,
 			  "OVERO_GPIO_W2W_NRESET") == 0) &&
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index d676668..9ab947c 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -28,7 +28,7 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/io.h>
-#include <linux/smc911x.h>
+#include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
 
 #include <asm/clkdev.h>
@@ -128,14 +128,15 @@ int realview_flash_register(struct resource *res, u32 num)
 	return platform_device_register(&realview_flash_device);
 }
 
-static struct smc911x_platdata realview_smc911x_platdata = {
-	.flags		= SMC911X_USE_32BIT,
-	.irq_flags	= IRQF_SHARED,
-	.irq_polarity	= 1,
+static struct smsc911x_platform_config smsc911x_config = {
+	.flags		= SMSC911X_USE_32BIT,
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_HIGH,
+	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
 };
 
 static struct platform_device realview_eth_device = {
-	.name		= "smc911x",
+	.name		= "smsc911x",
 	.id		= 0,
 	.num_resources	= 2,
 };
@@ -145,8 +146,8 @@ int realview_eth_register(const char *name, struct resource *res)
 	if (name)
 		realview_eth_device.name = name;
 	realview_eth_device.resource = res;
-	if (strcmp(realview_eth_device.name, "smc911x") == 0)
-		realview_eth_device.dev.platform_data = &realview_smc911x_platdata;
+	if (strcmp(realview_eth_device.name, "smsc911x") == 0)
+		realview_eth_device.dev.platform_data = &smsc911x_config;
 
 	return platform_device_register(&realview_eth_device);
 }
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 67d6d9c..d0d39ad 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -191,6 +191,7 @@ void __cpuinit local_timer_setup(void)
 	clk->name		= "dummy_timer";
 	clk->features		= CLOCK_EVT_FEAT_DUMMY;
 	clk->rating		= 200;
+	clk->mult               = 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
 	clk->cpumask		= cpumask_of(cpu);
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 94077fb..6f7e709 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -29,10 +29,10 @@ ENTRY(v6_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 /*
- * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
+ * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR (erratum 326103).
  * The test below covers all the write situations, including Java bytecodes
  */
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
 	tst	r3, #PSR_J_BIT			@ Java?
 	movne	pc, lr
 	do_thumb_abort
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index d6dd838..6e77c04 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -115,6 +115,10 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
 	raw_local_irq_restore(flags);
 }
 
+static inline void l2_inv_all(void)
+{
+	__asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
+}
 
 /*
  * Linux primitives.
@@ -254,9 +258,7 @@ static void __init enable_dcache(void)
 
 static void __init __invalidate_icache(void)
 {
-	int dummy;
-
-	__asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0" : "=r" (dummy));
+	__asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
 }
 
 static int __init invalidate_and_disable_icache(void)
@@ -321,6 +323,7 @@ static void __init enable_l2(void)
 
 		d = flush_and_disable_dcache();
 		i = invalidate_and_disable_icache();
+		l2_inv_all();
 		write_extra_features(u | 0x00400000);
 		if (i)
 			enable_icache();
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index ba592a9..a2bed62 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -15,13 +15,16 @@
  *  r10 = thread_info structure
  *  lr  = failure return
  */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
+#include <asm/thread_info.h>
 #include <asm/vfpmacros.h>
+#include "../kernel/entry-header.S"
 
 ENTRY(do_vfp)
+#ifdef CONFIG_PREEMPT
+	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+	add	r11, r4, #1		@ increment it
+	str	r11, [r10, #TI_PREEMPT]
+#endif
 	enable_irq
  	ldr	r4, .LCvfp
 	ldr	r11, [r10, #TI_CPU]	@ CPU number
@@ -30,6 +33,12 @@ ENTRY(do_vfp)
 ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r10
+	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+	sub	r11, r4, #1		@ decrement it
+	str	r11, [r10, #TI_PREEMPT]
+#endif
 	mov	pc, lr
 ENDPROC(vfp_null_entry)
 
@@ -41,6 +50,12 @@ ENDPROC(vfp_null_entry)
 
 	__INIT
 ENTRY(vfp_testing_entry)
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r10
+	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+	sub	r11, r4, #1		@ decrement it
+	str	r11, [r10, #TI_PREEMPT]
+#endif
 	ldr	r0, VFP_arch_address
 	str	r5, [r0]		@ known non-zero value
 	mov	pc, r9			@ we have handled the fault
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a5a4e57..83c4e38 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -137,6 +137,12 @@ check_for_exception:
 	VFPFMXR	FPEXC, r1		@ restore FPEXC last
 	sub	r2, r2, #4
 	str	r2, [sp, #S_PC]		@ retry the instruction
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r10
+	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+	sub	r11, r4, #1		@ decrement it
+	str	r11, [r10, #TI_PREEMPT]
+#endif
 	mov	pc, r9			@ we think we have handled things
 
 
@@ -155,6 +161,12 @@ look_for_VFP_exceptions:
 	@ not recognised by VFP
 
 	DBGSTR	"not VFP"
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r10
+	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+	sub	r11, r4, #1		@ decrement it
+	str	r11, [r10, #TI_PREEMPT]
+#endif
 	mov	pc, lr
 
 process_exception:
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 75457b3..01599c4 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -266,7 +266,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 		 * on VFP subarch 1.
 		 */
 		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
-		 return;
+		goto exit;
 	}
 
 	/*
@@ -297,7 +297,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
 	 */
 	if (fpexc ^ (FPEXC_EX | FPEXC_FP2V))
-		return;
+		goto exit;
 
 	/*
 	 * The barrier() here prevents fpinst2 being read
@@ -310,6 +310,8 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
+ exit:
+	preempt_enable();
 }
 
 static void vfp_enable(void *unused)
diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index fedf3e3..fb8a06b 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -1,5 +1,378 @@
-#ifdef __uClinux__
-#include "bootinfo_no.h"
-#else
-#include "bootinfo_mm.h"
+/*
+** asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
+**
+** Copyright 1992 by Greg Harp
+**
+** This file is subject to the terms and conditions of the GNU General Public
+** License.  See the file COPYING in the main directory of this archive
+** for more details.
+**
+** Created 09/29/92 by Greg Harp
+**
+** 5/2/94 Roman Hodek:
+**   Added bi_atari part of the machine dependent union bi_un; for now it
+**   contains just a model field to distinguish between TT and Falcon.
+** 26/7/96 Roman Zippel:
+**   Renamed to setup.h; added some useful macros to allow gcc some
+**   optimizations if possible.
+** 5/10/96 Geert Uytterhoeven:
+**   Redesign of the boot information structure; renamed to bootinfo.h again
+** 27/11/96 Geert Uytterhoeven:
+**   Backwards compatibility with bootinfo interface version 1.0
+*/
+
+#ifndef _M68K_BOOTINFO_H
+#define _M68K_BOOTINFO_H
+
+
+    /*
+     *  Bootinfo definitions
+     *
+     *  This is an easily parsable and extendable structure containing all
+     *  information to be passed from the bootstrap to the kernel.
+     *
+     *  This way I hope to keep all future changes back/forewards compatible.
+     *  Thus, keep your fingers crossed...
+     *
+     *  This structure is copied right after the kernel bss by the bootstrap
+     *  routine.
+     */
+
+#ifndef __ASSEMBLY__
+
+struct bi_record {
+    unsigned short tag;			/* tag ID */
+    unsigned short size;		/* size of record (in bytes) */
+    unsigned long data[0];		/* data */
+};
+
+#endif /* __ASSEMBLY__ */
+
+
+    /*
+     *  Tag Definitions
+     *
+     *  Machine independent tags start counting from 0x0000
+     *  Machine dependent tags start counting from 0x8000
+     */
+
+#define BI_LAST			0x0000	/* last record (sentinel) */
+#define BI_MACHTYPE		0x0001	/* machine type (u_long) */
+#define BI_CPUTYPE		0x0002	/* cpu type (u_long) */
+#define BI_FPUTYPE		0x0003	/* fpu type (u_long) */
+#define BI_MMUTYPE		0x0004	/* mmu type (u_long) */
+#define BI_MEMCHUNK		0x0005	/* memory chunk address and size */
+					/* (struct mem_info) */
+#define BI_RAMDISK		0x0006	/* ramdisk address and size */
+					/* (struct mem_info) */
+#define BI_COMMAND_LINE		0x0007	/* kernel command line parameters */
+					/* (string) */
+
+    /*
+     *  Amiga-specific tags
+     */
+
+#define BI_AMIGA_MODEL		0x8000	/* model (u_long) */
+#define BI_AMIGA_AUTOCON	0x8001	/* AutoConfig device */
+					/* (struct ConfigDev) */
+#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (u_long) */
+#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (u_char) */
+#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (u_char) */
+#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (u_long) */
+#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (u_long) */
+#define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
+
+    /*
+     *  Atari-specific tags
+     */
+
+#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (u_long) */
+#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (u_long) */
+					/* (values are ATARI_MACH_* defines */
+
+/* mch_cookie values (upper word) */
+#define ATARI_MCH_ST		0
+#define ATARI_MCH_STE		1
+#define ATARI_MCH_TT		2
+#define ATARI_MCH_FALCON	3
+
+/* mch_type values */
+#define ATARI_MACH_NORMAL	0	/* no special machine type */
+#define ATARI_MACH_MEDUSA	1	/* Medusa 040 */
+#define ATARI_MACH_HADES	2	/* Hades 040 or 060 */
+#define ATARI_MACH_AB40		3	/* Afterburner040 on Falcon */
+
+    /*
+     *  VME-specific tags
+     */
+
+#define BI_VME_TYPE		0x8000	/* VME sub-architecture (u_long) */
+#define BI_VME_BRDINFO		0x8001	/* VME board information (struct) */
+
+/* BI_VME_TYPE codes */
+#define	VME_TYPE_TP34V		0x0034	/* Tadpole TP34V */
+#define VME_TYPE_MVME147	0x0147	/* Motorola MVME147 */
+#define VME_TYPE_MVME162	0x0162	/* Motorola MVME162 */
+#define VME_TYPE_MVME166	0x0166	/* Motorola MVME166 */
+#define VME_TYPE_MVME167	0x0167	/* Motorola MVME167 */
+#define VME_TYPE_MVME172	0x0172	/* Motorola MVME172 */
+#define VME_TYPE_MVME177	0x0177	/* Motorola MVME177 */
+#define VME_TYPE_BVME4000	0x4000	/* BVM Ltd. BVME4000 */
+#define VME_TYPE_BVME6000	0x6000	/* BVM Ltd. BVME6000 */
+
+/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
+ * Motorola VME boards.  Contains board number, Bug version, board
+ * configuration options, etc.  See include/asm/mvme16xhw.h for details.
+ */
+
+
+    /*
+     *  Macintosh-specific tags (all u_long)
+     */
+
+#define BI_MAC_MODEL		0x8000	/* Mac Gestalt ID (model type) */
+#define BI_MAC_VADDR		0x8001	/* Mac video base address */
+#define BI_MAC_VDEPTH		0x8002	/* Mac video depth */
+#define BI_MAC_VROW		0x8003	/* Mac video rowbytes */
+#define BI_MAC_VDIM		0x8004	/* Mac video dimensions */
+#define BI_MAC_VLOGICAL		0x8005	/* Mac video logical base */
+#define BI_MAC_SCCBASE		0x8006	/* Mac SCC base address */
+#define BI_MAC_BTIME		0x8007	/* Mac boot time */
+#define BI_MAC_GMTBIAS		0x8008	/* Mac GMT timezone offset */
+#define BI_MAC_MEMSIZE		0x8009	/* Mac RAM size (sanity check) */
+#define BI_MAC_CPUID		0x800a	/* Mac CPU type (sanity check) */
+#define BI_MAC_ROMBASE		0x800b	/* Mac system ROM base address */
+
+    /*
+     *  Macintosh hardware profile data - unused, see macintosh.h for
+     *  resonable type values
+     */
+
+#define BI_MAC_VIA1BASE		0x8010	/* Mac VIA1 base address (always present) */
+#define BI_MAC_VIA2BASE		0x8011	/* Mac VIA2 base address (type varies) */
+#define BI_MAC_VIA2TYPE		0x8012	/* Mac VIA2 type (VIA, RBV, OSS) */
+#define BI_MAC_ADBTYPE		0x8013	/* Mac ADB interface type */
+#define BI_MAC_ASCBASE		0x8014	/* Mac Apple Sound Chip base address */
+#define BI_MAC_SCSI5380		0x8015	/* Mac NCR 5380 SCSI (base address, multi) */
+#define BI_MAC_SCSIDMA		0x8016	/* Mac SCSI DMA (base address) */
+#define BI_MAC_SCSI5396		0x8017	/* Mac NCR 53C96 SCSI (base address, multi) */
+#define BI_MAC_IDETYPE		0x8018	/* Mac IDE interface type */
+#define BI_MAC_IDEBASE		0x8019	/* Mac IDE interface base address */
+#define BI_MAC_NUBUS		0x801a	/* Mac Nubus type (none, regular, pseudo) */
+#define BI_MAC_SLOTMASK		0x801b	/* Mac Nubus slots present */
+#define BI_MAC_SCCTYPE		0x801c	/* Mac SCC serial type (normal, IOP) */
+#define BI_MAC_ETHTYPE		0x801d	/* Mac builtin ethernet type (Sonic, MACE */
+#define BI_MAC_ETHBASE		0x801e	/* Mac builtin ethernet base address */
+#define BI_MAC_PMU		0x801f	/* Mac power management / poweroff hardware */
+#define BI_MAC_IOP_SWIM		0x8020	/* Mac SWIM floppy IOP */
+#define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
+
+    /*
+     * Mac: compatibility with old booter data format (temporarily)
+     * Fields unused with the new bootinfo can be deleted now; instead of
+     * adding new fields the struct might be splitted into a hardware address
+     * part and a hardware type part
+     */
+
+#ifndef __ASSEMBLY__
+
+struct mac_booter_data
+{
+	unsigned long videoaddr;
+	unsigned long videorow;
+	unsigned long videodepth;
+	unsigned long dimensions;
+	unsigned long args;
+	unsigned long boottime;
+	unsigned long gmtbias;
+	unsigned long bootver;
+	unsigned long videological;
+	unsigned long sccbase;
+	unsigned long id;
+	unsigned long memsize;
+	unsigned long serialmf;
+	unsigned long serialhsk;
+	unsigned long serialgpi;
+	unsigned long printmf;
+	unsigned long printhsk;
+	unsigned long printgpi;
+	unsigned long cpuid;
+	unsigned long rombase;
+	unsigned long adbdelay;
+	unsigned long timedbra;
+};
+
+extern struct mac_booter_data
+	mac_bi_data;
+
 #endif
+
+    /*
+     *  Apollo-specific tags
+     */
+
+#define BI_APOLLO_MODEL         0x8000  /* model (u_long) */
+
+    /*
+     *  HP300-specific tags
+     */
+
+#define BI_HP300_MODEL		0x8000	/* model (u_long) */
+#define BI_HP300_UART_SCODE	0x8001	/* UART select code (u_long) */
+#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
+
+    /*
+     * Stuff for bootinfo interface versioning
+     *
+     * At the start of kernel code, a 'struct bootversion' is located.
+     * bootstrap checks for a matching version of the interface before booting
+     * a kernel, to avoid user confusion if kernel and bootstrap don't work
+     * together :-)
+     *
+     * If incompatible changes are made to the bootinfo interface, the major
+     * number below should be stepped (and the minor reset to 0) for the
+     * appropriate machine. If a change is backward-compatible, the minor
+     * should be stepped. "Backwards-compatible" means that booting will work,
+     * but certain features may not.
+     */
+
+#define BOOTINFOV_MAGIC			0x4249561A	/* 'BIV^Z' */
+#define MK_BI_VERSION(major,minor)	(((major)<<16)+(minor))
+#define BI_VERSION_MAJOR(v)		(((v) >> 16) & 0xffff)
+#define BI_VERSION_MINOR(v)		((v) & 0xffff)
+
+#ifndef __ASSEMBLY__
+
+struct bootversion {
+    unsigned short branch;
+    unsigned long magic;
+    struct {
+	unsigned long machtype;
+	unsigned long version;
+    } machversions[0];
+};
+
+#endif /* __ASSEMBLY__ */
+
+#define AMIGA_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
+#define ATARI_BOOTI_VERSION    MK_BI_VERSION( 2, 1 )
+#define MAC_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
+#define MVME147_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
+#define MVME16x_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
+#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
+#define Q40_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
+#define HP300_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
+
+#ifdef BOOTINFO_COMPAT_1_0
+
+    /*
+     *  Backwards compatibility with bootinfo interface version 1.0
+     */
+
+#define COMPAT_AMIGA_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
+#define COMPAT_ATARI_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
+#define COMPAT_MAC_BOOTI_VERSION      MK_BI_VERSION( 1, 0 )
+
+#include <linux/zorro.h>
+
+#define COMPAT_NUM_AUTO    16
+
+struct compat_bi_Amiga {
+    int model;
+    int num_autocon;
+    struct ConfigDev autocon[COMPAT_NUM_AUTO];
+    unsigned long chip_size;
+    unsigned char vblank;
+    unsigned char psfreq;
+    unsigned long eclock;
+    unsigned long chipset;
+    unsigned long hw_present;
+};
+
+struct compat_bi_Atari {
+    unsigned long hw_present;
+    unsigned long mch_cookie;
+};
+
+#ifndef __ASSEMBLY__
+
+struct compat_bi_Macintosh
+{
+	unsigned long videoaddr;
+	unsigned long videorow;
+	unsigned long videodepth;
+	unsigned long dimensions;
+	unsigned long args;
+	unsigned long boottime;
+	unsigned long gmtbias;
+	unsigned long bootver;
+	unsigned long videological;
+	unsigned long sccbase;
+	unsigned long id;
+	unsigned long memsize;
+	unsigned long serialmf;
+	unsigned long serialhsk;
+	unsigned long serialgpi;
+	unsigned long printmf;
+	unsigned long printhsk;
+	unsigned long printgpi;
+	unsigned long cpuid;
+	unsigned long rombase;
+	unsigned long adbdelay;
+	unsigned long timedbra;
+};
+
+#endif
+
+struct compat_mem_info {
+    unsigned long addr;
+    unsigned long size;
+};
+
+#define COMPAT_NUM_MEMINFO  4
+
+#define COMPAT_CPUB_68020 0
+#define COMPAT_CPUB_68030 1
+#define COMPAT_CPUB_68040 2
+#define COMPAT_CPUB_68060 3
+#define COMPAT_FPUB_68881 5
+#define COMPAT_FPUB_68882 6
+#define COMPAT_FPUB_68040 7
+#define COMPAT_FPUB_68060 8
+
+#define COMPAT_CPU_68020    (1<<COMPAT_CPUB_68020)
+#define COMPAT_CPU_68030    (1<<COMPAT_CPUB_68030)
+#define COMPAT_CPU_68040    (1<<COMPAT_CPUB_68040)
+#define COMPAT_CPU_68060    (1<<COMPAT_CPUB_68060)
+#define COMPAT_CPU_MASK     (31)
+#define COMPAT_FPU_68881    (1<<COMPAT_FPUB_68881)
+#define COMPAT_FPU_68882    (1<<COMPAT_FPUB_68882)
+#define COMPAT_FPU_68040    (1<<COMPAT_FPUB_68040)
+#define COMPAT_FPU_68060    (1<<COMPAT_FPUB_68060)
+#define COMPAT_FPU_MASK     (0xfe0)
+
+#define COMPAT_CL_SIZE      (256)
+
+struct compat_bootinfo {
+    unsigned long machtype;
+    unsigned long cputype;
+    struct compat_mem_info memory[COMPAT_NUM_MEMINFO];
+    int num_memory;
+    unsigned long ramdisk_size;
+    unsigned long ramdisk_addr;
+    char command_line[COMPAT_CL_SIZE];
+    union {
+	struct compat_bi_Amiga     bi_ami;
+	struct compat_bi_Atari     bi_ata;
+	struct compat_bi_Macintosh bi_mac;
+    } bi_un;
+};
+
+#define bi_amiga	bi_un.bi_ami
+#define bi_atari	bi_un.bi_ata
+#define bi_mac		bi_un.bi_mac
+
+#endif /* BOOTINFO_COMPAT_1_0 */
+
+
+#endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/include/asm/bootinfo_mm.h b/arch/m68k/include/asm/bootinfo_mm.h
deleted file mode 100644
index fb8a06b..0000000
--- a/arch/m68k/include/asm/bootinfo_mm.h
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
-** asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
-**
-** Copyright 1992 by Greg Harp
-**
-** This file is subject to the terms and conditions of the GNU General Public
-** License.  See the file COPYING in the main directory of this archive
-** for more details.
-**
-** Created 09/29/92 by Greg Harp
-**
-** 5/2/94 Roman Hodek:
-**   Added bi_atari part of the machine dependent union bi_un; for now it
-**   contains just a model field to distinguish between TT and Falcon.
-** 26/7/96 Roman Zippel:
-**   Renamed to setup.h; added some useful macros to allow gcc some
-**   optimizations if possible.
-** 5/10/96 Geert Uytterhoeven:
-**   Redesign of the boot information structure; renamed to bootinfo.h again
-** 27/11/96 Geert Uytterhoeven:
-**   Backwards compatibility with bootinfo interface version 1.0
-*/
-
-#ifndef _M68K_BOOTINFO_H
-#define _M68K_BOOTINFO_H
-
-
-    /*
-     *  Bootinfo definitions
-     *
-     *  This is an easily parsable and extendable structure containing all
-     *  information to be passed from the bootstrap to the kernel.
-     *
-     *  This way I hope to keep all future changes back/forewards compatible.
-     *  Thus, keep your fingers crossed...
-     *
-     *  This structure is copied right after the kernel bss by the bootstrap
-     *  routine.
-     */
-
-#ifndef __ASSEMBLY__
-
-struct bi_record {
-    unsigned short tag;			/* tag ID */
-    unsigned short size;		/* size of record (in bytes) */
-    unsigned long data[0];		/* data */
-};
-
-#endif /* __ASSEMBLY__ */
-
-
-    /*
-     *  Tag Definitions
-     *
-     *  Machine independent tags start counting from 0x0000
-     *  Machine dependent tags start counting from 0x8000
-     */
-
-#define BI_LAST			0x0000	/* last record (sentinel) */
-#define BI_MACHTYPE		0x0001	/* machine type (u_long) */
-#define BI_CPUTYPE		0x0002	/* cpu type (u_long) */
-#define BI_FPUTYPE		0x0003	/* fpu type (u_long) */
-#define BI_MMUTYPE		0x0004	/* mmu type (u_long) */
-#define BI_MEMCHUNK		0x0005	/* memory chunk address and size */
-					/* (struct mem_info) */
-#define BI_RAMDISK		0x0006	/* ramdisk address and size */
-					/* (struct mem_info) */
-#define BI_COMMAND_LINE		0x0007	/* kernel command line parameters */
-					/* (string) */
-
-    /*
-     *  Amiga-specific tags
-     */
-
-#define BI_AMIGA_MODEL		0x8000	/* model (u_long) */
-#define BI_AMIGA_AUTOCON	0x8001	/* AutoConfig device */
-					/* (struct ConfigDev) */
-#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (u_long) */
-#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (u_char) */
-#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (u_char) */
-#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (u_long) */
-#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (u_long) */
-#define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
-
-    /*
-     *  Atari-specific tags
-     */
-
-#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (u_long) */
-#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (u_long) */
-					/* (values are ATARI_MACH_* defines */
-
-/* mch_cookie values (upper word) */
-#define ATARI_MCH_ST		0
-#define ATARI_MCH_STE		1
-#define ATARI_MCH_TT		2
-#define ATARI_MCH_FALCON	3
-
-/* mch_type values */
-#define ATARI_MACH_NORMAL	0	/* no special machine type */
-#define ATARI_MACH_MEDUSA	1	/* Medusa 040 */
-#define ATARI_MACH_HADES	2	/* Hades 040 or 060 */
-#define ATARI_MACH_AB40		3	/* Afterburner040 on Falcon */
-
-    /*
-     *  VME-specific tags
-     */
-
-#define BI_VME_TYPE		0x8000	/* VME sub-architecture (u_long) */
-#define BI_VME_BRDINFO		0x8001	/* VME board information (struct) */
-
-/* BI_VME_TYPE codes */
-#define	VME_TYPE_TP34V		0x0034	/* Tadpole TP34V */
-#define VME_TYPE_MVME147	0x0147	/* Motorola MVME147 */
-#define VME_TYPE_MVME162	0x0162	/* Motorola MVME162 */
-#define VME_TYPE_MVME166	0x0166	/* Motorola MVME166 */
-#define VME_TYPE_MVME167	0x0167	/* Motorola MVME167 */
-#define VME_TYPE_MVME172	0x0172	/* Motorola MVME172 */
-#define VME_TYPE_MVME177	0x0177	/* Motorola MVME177 */
-#define VME_TYPE_BVME4000	0x4000	/* BVM Ltd. BVME4000 */
-#define VME_TYPE_BVME6000	0x6000	/* BVM Ltd. BVME6000 */
-
-/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
- * Motorola VME boards.  Contains board number, Bug version, board
- * configuration options, etc.  See include/asm/mvme16xhw.h for details.
- */
-
-
-    /*
-     *  Macintosh-specific tags (all u_long)
-     */
-
-#define BI_MAC_MODEL		0x8000	/* Mac Gestalt ID (model type) */
-#define BI_MAC_VADDR		0x8001	/* Mac video base address */
-#define BI_MAC_VDEPTH		0x8002	/* Mac video depth */
-#define BI_MAC_VROW		0x8003	/* Mac video rowbytes */
-#define BI_MAC_VDIM		0x8004	/* Mac video dimensions */
-#define BI_MAC_VLOGICAL		0x8005	/* Mac video logical base */
-#define BI_MAC_SCCBASE		0x8006	/* Mac SCC base address */
-#define BI_MAC_BTIME		0x8007	/* Mac boot time */
-#define BI_MAC_GMTBIAS		0x8008	/* Mac GMT timezone offset */
-#define BI_MAC_MEMSIZE		0x8009	/* Mac RAM size (sanity check) */
-#define BI_MAC_CPUID		0x800a	/* Mac CPU type (sanity check) */
-#define BI_MAC_ROMBASE		0x800b	/* Mac system ROM base address */
-
-    /*
-     *  Macintosh hardware profile data - unused, see macintosh.h for
-     *  resonable type values
-     */
-
-#define BI_MAC_VIA1BASE		0x8010	/* Mac VIA1 base address (always present) */
-#define BI_MAC_VIA2BASE		0x8011	/* Mac VIA2 base address (type varies) */
-#define BI_MAC_VIA2TYPE		0x8012	/* Mac VIA2 type (VIA, RBV, OSS) */
-#define BI_MAC_ADBTYPE		0x8013	/* Mac ADB interface type */
-#define BI_MAC_ASCBASE		0x8014	/* Mac Apple Sound Chip base address */
-#define BI_MAC_SCSI5380		0x8015	/* Mac NCR 5380 SCSI (base address, multi) */
-#define BI_MAC_SCSIDMA		0x8016	/* Mac SCSI DMA (base address) */
-#define BI_MAC_SCSI5396		0x8017	/* Mac NCR 53C96 SCSI (base address, multi) */
-#define BI_MAC_IDETYPE		0x8018	/* Mac IDE interface type */
-#define BI_MAC_IDEBASE		0x8019	/* Mac IDE interface base address */
-#define BI_MAC_NUBUS		0x801a	/* Mac Nubus type (none, regular, pseudo) */
-#define BI_MAC_SLOTMASK		0x801b	/* Mac Nubus slots present */
-#define BI_MAC_SCCTYPE		0x801c	/* Mac SCC serial type (normal, IOP) */
-#define BI_MAC_ETHTYPE		0x801d	/* Mac builtin ethernet type (Sonic, MACE */
-#define BI_MAC_ETHBASE		0x801e	/* Mac builtin ethernet base address */
-#define BI_MAC_PMU		0x801f	/* Mac power management / poweroff hardware */
-#define BI_MAC_IOP_SWIM		0x8020	/* Mac SWIM floppy IOP */
-#define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
-
-    /*
-     * Mac: compatibility with old booter data format (temporarily)
-     * Fields unused with the new bootinfo can be deleted now; instead of
-     * adding new fields the struct might be splitted into a hardware address
-     * part and a hardware type part
-     */
-
-#ifndef __ASSEMBLY__
-
-struct mac_booter_data
-{
-	unsigned long videoaddr;
-	unsigned long videorow;
-	unsigned long videodepth;
-	unsigned long dimensions;
-	unsigned long args;
-	unsigned long boottime;
-	unsigned long gmtbias;
-	unsigned long bootver;
-	unsigned long videological;
-	unsigned long sccbase;
-	unsigned long id;
-	unsigned long memsize;
-	unsigned long serialmf;
-	unsigned long serialhsk;
-	unsigned long serialgpi;
-	unsigned long printmf;
-	unsigned long printhsk;
-	unsigned long printgpi;
-	unsigned long cpuid;
-	unsigned long rombase;
-	unsigned long adbdelay;
-	unsigned long timedbra;
-};
-
-extern struct mac_booter_data
-	mac_bi_data;
-
-#endif
-
-    /*
-     *  Apollo-specific tags
-     */
-
-#define BI_APOLLO_MODEL         0x8000  /* model (u_long) */
-
-    /*
-     *  HP300-specific tags
-     */
-
-#define BI_HP300_MODEL		0x8000	/* model (u_long) */
-#define BI_HP300_UART_SCODE	0x8001	/* UART select code (u_long) */
-#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
-
-    /*
-     * Stuff for bootinfo interface versioning
-     *
-     * At the start of kernel code, a 'struct bootversion' is located.
-     * bootstrap checks for a matching version of the interface before booting
-     * a kernel, to avoid user confusion if kernel and bootstrap don't work
-     * together :-)
-     *
-     * If incompatible changes are made to the bootinfo interface, the major
-     * number below should be stepped (and the minor reset to 0) for the
-     * appropriate machine. If a change is backward-compatible, the minor
-     * should be stepped. "Backwards-compatible" means that booting will work,
-     * but certain features may not.
-     */
-
-#define BOOTINFOV_MAGIC			0x4249561A	/* 'BIV^Z' */
-#define MK_BI_VERSION(major,minor)	(((major)<<16)+(minor))
-#define BI_VERSION_MAJOR(v)		(((v) >> 16) & 0xffff)
-#define BI_VERSION_MINOR(v)		((v) & 0xffff)
-
-#ifndef __ASSEMBLY__
-
-struct bootversion {
-    unsigned short branch;
-    unsigned long magic;
-    struct {
-	unsigned long machtype;
-	unsigned long version;
-    } machversions[0];
-};
-
-#endif /* __ASSEMBLY__ */
-
-#define AMIGA_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
-#define ATARI_BOOTI_VERSION    MK_BI_VERSION( 2, 1 )
-#define MAC_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
-#define MVME147_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
-#define MVME16x_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
-#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
-#define Q40_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
-#define HP300_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
-
-#ifdef BOOTINFO_COMPAT_1_0
-
-    /*
-     *  Backwards compatibility with bootinfo interface version 1.0
-     */
-
-#define COMPAT_AMIGA_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
-#define COMPAT_ATARI_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
-#define COMPAT_MAC_BOOTI_VERSION      MK_BI_VERSION( 1, 0 )
-
-#include <linux/zorro.h>
-
-#define COMPAT_NUM_AUTO    16
-
-struct compat_bi_Amiga {
-    int model;
-    int num_autocon;
-    struct ConfigDev autocon[COMPAT_NUM_AUTO];
-    unsigned long chip_size;
-    unsigned char vblank;
-    unsigned char psfreq;
-    unsigned long eclock;
-    unsigned long chipset;
-    unsigned long hw_present;
-};
-
-struct compat_bi_Atari {
-    unsigned long hw_present;
-    unsigned long mch_cookie;
-};
-
-#ifndef __ASSEMBLY__
-
-struct compat_bi_Macintosh
-{
-	unsigned long videoaddr;
-	unsigned long videorow;
-	unsigned long videodepth;
-	unsigned long dimensions;
-	unsigned long args;
-	unsigned long boottime;
-	unsigned long gmtbias;
-	unsigned long bootver;
-	unsigned long videological;
-	unsigned long sccbase;
-	unsigned long id;
-	unsigned long memsize;
-	unsigned long serialmf;
-	unsigned long serialhsk;
-	unsigned long serialgpi;
-	unsigned long printmf;
-	unsigned long printhsk;
-	unsigned long printgpi;
-	unsigned long cpuid;
-	unsigned long rombase;
-	unsigned long adbdelay;
-	unsigned long timedbra;
-};
-
-#endif
-
-struct compat_mem_info {
-    unsigned long addr;
-    unsigned long size;
-};
-
-#define COMPAT_NUM_MEMINFO  4
-
-#define COMPAT_CPUB_68020 0
-#define COMPAT_CPUB_68030 1
-#define COMPAT_CPUB_68040 2
-#define COMPAT_CPUB_68060 3
-#define COMPAT_FPUB_68881 5
-#define COMPAT_FPUB_68882 6
-#define COMPAT_FPUB_68040 7
-#define COMPAT_FPUB_68060 8
-
-#define COMPAT_CPU_68020    (1<<COMPAT_CPUB_68020)
-#define COMPAT_CPU_68030    (1<<COMPAT_CPUB_68030)
-#define COMPAT_CPU_68040    (1<<COMPAT_CPUB_68040)
-#define COMPAT_CPU_68060    (1<<COMPAT_CPUB_68060)
-#define COMPAT_CPU_MASK     (31)
-#define COMPAT_FPU_68881    (1<<COMPAT_FPUB_68881)
-#define COMPAT_FPU_68882    (1<<COMPAT_FPUB_68882)
-#define COMPAT_FPU_68040    (1<<COMPAT_FPUB_68040)
-#define COMPAT_FPU_68060    (1<<COMPAT_FPUB_68060)
-#define COMPAT_FPU_MASK     (0xfe0)
-
-#define COMPAT_CL_SIZE      (256)
-
-struct compat_bootinfo {
-    unsigned long machtype;
-    unsigned long cputype;
-    struct compat_mem_info memory[COMPAT_NUM_MEMINFO];
-    int num_memory;
-    unsigned long ramdisk_size;
-    unsigned long ramdisk_addr;
-    char command_line[COMPAT_CL_SIZE];
-    union {
-	struct compat_bi_Amiga     bi_ami;
-	struct compat_bi_Atari     bi_ata;
-	struct compat_bi_Macintosh bi_mac;
-    } bi_un;
-};
-
-#define bi_amiga	bi_un.bi_ami
-#define bi_atari	bi_un.bi_ata
-#define bi_mac		bi_un.bi_mac
-
-#endif /* BOOTINFO_COMPAT_1_0 */
-
-
-#endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/include/asm/bootinfo_no.h b/arch/m68k/include/asm/bootinfo_no.h
deleted file mode 100644
index c12e526..0000000
--- a/arch/m68k/include/asm/bootinfo_no.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-/* Nothing for m68knommu */
diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h
index 997e094..ef9a2e4 100644
--- a/arch/m68k/include/asm/bug.h
+++ b/arch/m68k/include/asm/bug.h
@@ -1,5 +1,30 @@
-#ifdef __uClinux__
-#include "bug_no.h"
+#ifndef _M68K_BUG_H
+#define _M68K_BUG_H
+
+#ifdef CONFIG_MMU
+#ifdef CONFIG_BUG
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_SUN3
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	__builtin_trap(); \
+} while (0)
 #else
-#include "bug_mm.h"
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	panic("BUG!"); \
+} while (0)
+#endif
+#else
+#define BUG() do { \
+	__builtin_trap(); \
+} while (0)
+#endif
+
+#define HAVE_ARCH_BUG
+#endif
+#endif /* CONFIG_MMU */
+
+#include <asm-generic/bug.h>
+
 #endif
diff --git a/arch/m68k/include/asm/bug_mm.h b/arch/m68k/include/asm/bug_mm.h
deleted file mode 100644
index e5b528d..0000000
--- a/arch/m68k/include/asm/bug_mm.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _M68K_BUG_H
-#define _M68K_BUG_H
-
-
-#ifdef CONFIG_BUG
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_SUN3
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	__builtin_trap(); \
-} while (0)
-#else
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	panic("BUG!"); \
-} while (0)
-#endif
-#else
-#define BUG() do { \
-	__builtin_trap(); \
-} while (0)
-#endif
-
-#define HAVE_ARCH_BUG
-#endif
-
-#include <asm-generic/bug.h>
-
-#endif
diff --git a/arch/m68k/include/asm/bug_no.h b/arch/m68k/include/asm/bug_no.h
deleted file mode 100644
index 70e7dc0..0000000
--- a/arch/m68k/include/asm/bug_no.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _M68KNOMMU_BUG_H
-#define _M68KNOMMU_BUG_H
-#include <asm-generic/bug.h>
-#endif
diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h
index 01f047d..d06207b 100644
--- a/arch/m68k/include/asm/bugs.h
+++ b/arch/m68k/include/asm/bugs.h
@@ -1,5 +1,20 @@
-#ifdef __uClinux__
-#include "bugs_no.h"
+/*
+ *  include/asm-m68k/bugs.h
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ */
+
+#ifdef CONFIG_MMU
+extern void check_bugs(void);	/* in arch/m68k/kernel/setup.c */
 #else
-#include "bugs_mm.h"
+static void check_bugs(void)
+{
+}
 #endif
diff --git a/arch/m68k/include/asm/bugs_mm.h b/arch/m68k/include/asm/bugs_mm.h
deleted file mode 100644
index d019355..0000000
--- a/arch/m68k/include/asm/bugs_mm.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- *  include/asm-m68k/bugs.h
- *
- *  Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-extern void check_bugs(void);	/* in arch/m68k/kernel/setup.c */
diff --git a/arch/m68k/include/asm/bugs_no.h b/arch/m68k/include/asm/bugs_no.h
deleted file mode 100644
index 5f382da..0000000
--- a/arch/m68k/include/asm/bugs_no.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  include/asm-m68k/bugs.h
- *
- *  Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-static void check_bugs(void)
-{
-}
diff --git a/arch/m68k/include/asm/cache.h b/arch/m68k/include/asm/cache.h
index 599c29b..fed3fd3 100644
--- a/arch/m68k/include/asm/cache.h
+++ b/arch/m68k/include/asm/cache.h
@@ -1,5 +1,11 @@
-#ifdef __uClinux__
-#include "cache_no.h"
-#else
-#include "cache_mm.h"
+/*
+ * include/asm-m68k/cache.h
+ */
+#ifndef __ARCH_M68K_CACHE_H
+#define __ARCH_M68K_CACHE_H
+
+/* bytes per L1 cache line */
+#define        L1_CACHE_SHIFT  4
+#define        L1_CACHE_BYTES  (1<< L1_CACHE_SHIFT)
+
 #endif
diff --git a/arch/m68k/include/asm/cache_mm.h b/arch/m68k/include/asm/cache_mm.h
deleted file mode 100644
index fed3fd3..0000000
--- a/arch/m68k/include/asm/cache_mm.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * include/asm-m68k/cache.h
- */
-#ifndef __ARCH_M68K_CACHE_H
-#define __ARCH_M68K_CACHE_H
-
-/* bytes per L1 cache line */
-#define        L1_CACHE_SHIFT  4
-#define        L1_CACHE_BYTES  (1<< L1_CACHE_SHIFT)
-
-#endif
diff --git a/arch/m68k/include/asm/cache_no.h b/arch/m68k/include/asm/cache_no.h
deleted file mode 100644
index 24e9eac..0000000
--- a/arch/m68k/include/asm/cache_no.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __ARCH_M68KNOMMU_CACHE_H
-#define __ARCH_M68KNOMMU_CACHE_H
-
-/* bytes per L1 cache line */
-#define        L1_CACHE_BYTES  16	/* this need to be at least 1 */
-
-/* m68k-elf-gcc  2.95.2 doesn't like these */
-
-#define __cacheline_aligned
-#define ____cacheline_aligned
-
-#endif
diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h
index 51b056d..91fcc53 100644
--- a/arch/m68k/include/asm/current.h
+++ b/arch/m68k/include/asm/current.h
@@ -1,5 +1,28 @@
-#ifdef __uClinux__
-#include "current_no.h"
+#ifndef _M68K_CURRENT_H
+#define _M68K_CURRENT_H
+
+#ifdef CONFIG_MMU
+
+register struct task_struct *current __asm__("%a2");
+
 #else
-#include "current_mm.h"
-#endif
+
+/*
+ *	Rather than dedicate a register (as the m68k source does), we
+ *	just keep a global,  we should probably just change it all to be
+ *	current and lose _current_task.
+ */
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct *get_current(void)
+{
+	return(current_thread_info()->task);
+}
+
+#define	current	get_current()
+
+#endif /* CONFNIG_MMU */
+
+#endif /* !(_M68K_CURRENT_H) */
diff --git a/arch/m68k/include/asm/current_mm.h b/arch/m68k/include/asm/current_mm.h
deleted file mode 100644
index 8de8f8c..0000000
--- a/arch/m68k/include/asm/current_mm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M68K_CURRENT_H
-#define _M68K_CURRENT_H
-
-register struct task_struct *current __asm__("%a2");
-
-#endif /* !(_M68K_CURRENT_H) */
diff --git a/arch/m68k/include/asm/current_no.h b/arch/m68k/include/asm/current_no.h
deleted file mode 100644
index 53ee0f9..0000000
--- a/arch/m68k/include/asm/current_no.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _M68KNOMMU_CURRENT_H
-#define _M68KNOMMU_CURRENT_H
-/*
- *	current.h
- *	(C) Copyright 2000, Lineo, David McCullough <davidm@uclinux.org>
- *	(C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
- *
- *	rather than dedicate a register (as the m68k source does), we
- *	just keep a global,  we should probably just change it all to be
- *	current and lose _current_task.
- */
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct *get_current(void)
-{
-	return(current_thread_info()->task);
-}
-
-#define	current	get_current()
-
-#endif /* _M68KNOMMU_CURRENT_H */
diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h
index d211d9f..edb6614 100644
--- a/arch/m68k/include/asm/div64.h
+++ b/arch/m68k/include/asm/div64.h
@@ -1,5 +1,34 @@
-#ifdef __uClinux__
-#include "div64_no.h"
+#ifndef _M68K_DIV64_H
+#define _M68K_DIV64_H
+
+#ifdef CONFIG_MMU
+
+#include <linux/types.h>
+
+/* n = n / base; return rem; */
+
+#define do_div(n, base) ({					\
+	union {							\
+		unsigned long n32[2];				\
+		unsigned long long n64;				\
+	} __n;							\
+	unsigned long __rem, __upper;				\
+								\
+	__n.n64 = (n);						\
+	if ((__upper = __n.n32[0])) {				\
+		asm ("divul.l %2,%1:%0"				\
+			: "=d" (__n.n32[0]), "=d" (__upper)	\
+			: "d" (base), "0" (__n.n32[0]));	\
+	}							\
+	asm ("divu.l %2,%1:%0"					\
+		: "=d" (__n.n32[1]), "=d" (__rem)		\
+		: "d" (base), "1" (__upper), "0" (__n.n32[1]));	\
+	(n) = __n.n64;						\
+	__rem;							\
+})
+
 #else
-#include "div64_mm.h"
-#endif
+#include <asm-generic/div64.h>
+#endif /* CONFIG_MMU */
+
+#endif /* _M68K_DIV64_H */
diff --git a/arch/m68k/include/asm/div64_mm.h b/arch/m68k/include/asm/div64_mm.h
deleted file mode 100644
index 8243c93..0000000
--- a/arch/m68k/include/asm/div64_mm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _M68K_DIV64_H
-#define _M68K_DIV64_H
-
-#include <linux/types.h>
-
-/* n = n / base; return rem; */
-
-#define do_div(n, base) ({					\
-	union {							\
-		unsigned long n32[2];				\
-		unsigned long long n64;				\
-	} __n;							\
-	unsigned long __rem, __upper;				\
-								\
-	__n.n64 = (n);						\
-	if ((__upper = __n.n32[0])) {				\
-		asm ("divul.l %2,%1:%0"				\
-			: "=d" (__n.n32[0]), "=d" (__upper)	\
-			: "d" (base), "0" (__n.n32[0]));	\
-	}							\
-	asm ("divu.l %2,%1:%0"					\
-		: "=d" (__n.n32[1]), "=d" (__rem)		\
-		: "d" (base), "1" (__upper), "0" (__n.n32[1]));	\
-	(n) = __n.n64;						\
-	__rem;							\
-})
-
-#endif /* _M68K_DIV64_H */
diff --git a/arch/m68k/include/asm/div64_no.h b/arch/m68k/include/asm/div64_no.h
deleted file mode 100644
index 6cd978c..0000000
--- a/arch/m68k/include/asm/div64_no.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/m68k/include/asm/dma-mapping.h b/arch/m68k/include/asm/dma-mapping.h
index f4a4c76..26f5054 100644
--- a/arch/m68k/include/asm/dma-mapping.h
+++ b/arch/m68k/include/asm/dma-mapping.h
@@ -1,5 +1,112 @@
-#ifdef __uClinux__
-#include "dma-mapping_no.h"
+#ifndef _M68K_DMA_MAPPING_H
+#define _M68K_DMA_MAPPING_H
+
+#include <asm/cache.h>
+
+struct scatterlist;
+
+#ifndef CONFIG_MMU_SUN3
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+static inline int dma_set_mask(struct device *dev, u64 mask)
+{
+	return 0;
+}
+
+static inline int dma_get_cache_alignment(void)
+{
+	return 1 << L1_CACHE_SHIFT;
+}
+
+static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+extern void *dma_alloc_coherent(struct device *, size_t,
+				dma_addr_t *, gfp_t);
+extern void dma_free_coherent(struct device *, size_t,
+			      void *, dma_addr_t);
+
+static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
+					  dma_addr_t *handle, gfp_t flag)
+{
+	return dma_alloc_coherent(dev, size, handle, flag);
+}
+static inline void dma_free_noncoherent(struct device *dev, size_t size,
+					void *addr, dma_addr_t handle)
+{
+	dma_free_coherent(dev, size, addr, handle);
+}
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+				  enum dma_data_direction dir)
+{
+	/* we use coherent allocation, so not much to do here. */
+}
+
+extern dma_addr_t dma_map_single(struct device *, void *, size_t,
+				 enum dma_data_direction);
+static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
+				    size_t size, enum dma_data_direction dir)
+{
+}
+
+extern dma_addr_t dma_map_page(struct device *, struct page *,
+			       unsigned long, size_t size,
+			       enum dma_data_direction);
+static inline void dma_unmap_page(struct device *dev, dma_addr_t address,
+				  size_t size, enum dma_data_direction dir)
+{
+}
+
+extern int dma_map_sg(struct device *, struct scatterlist *, int,
+		      enum dma_data_direction);
+static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+				int nhwentries, enum dma_data_direction dir)
+{
+}
+
+extern void dma_sync_single_for_device(struct device *, dma_addr_t, size_t,
+				       enum dma_data_direction);
+extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
+				   enum dma_data_direction);
+
+static inline void dma_sync_single_range_for_device(struct device *dev,
+		dma_addr_t dma_handle, unsigned long offset, size_t size,
+		enum dma_data_direction direction)
+{
+	/* just sync everything for now */
+	dma_sync_single_for_device(dev, dma_handle, offset + size, direction);
+}
+
+static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
+					   size_t size, enum dma_data_direction dir)
+{
+}
+
+static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				       int nents, enum dma_data_direction dir)
+{
+}
+
+static inline void dma_sync_single_range_for_cpu(struct device *dev,
+		dma_addr_t dma_handle, unsigned long offset, size_t size,
+		enum dma_data_direction direction)
+{
+	/* just sync everything for now */
+	dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction);
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
+{
+	return 0;
+}
+
 #else
-#include "dma-mapping_mm.h"
+#include <asm-generic/dma-mapping-broken.h>
 #endif
+
+#endif  /* _M68K_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/dma-mapping_mm.h b/arch/m68k/include/asm/dma-mapping_mm.h
deleted file mode 100644
index 26f5054..0000000
--- a/arch/m68k/include/asm/dma-mapping_mm.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef _M68K_DMA_MAPPING_H
-#define _M68K_DMA_MAPPING_H
-
-#include <asm/cache.h>
-
-struct scatterlist;
-
-#ifndef CONFIG_MMU_SUN3
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-	return 1;
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
-	return 0;
-}
-
-static inline int dma_get_cache_alignment(void)
-{
-	return 1 << L1_CACHE_SHIFT;
-}
-
-static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
-{
-	return 0;
-}
-
-extern void *dma_alloc_coherent(struct device *, size_t,
-				dma_addr_t *, gfp_t);
-extern void dma_free_coherent(struct device *, size_t,
-			      void *, dma_addr_t);
-
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
-					  dma_addr_t *handle, gfp_t flag)
-{
-	return dma_alloc_coherent(dev, size, handle, flag);
-}
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
-					void *addr, dma_addr_t handle)
-{
-	dma_free_coherent(dev, size, addr, handle);
-}
-static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-				  enum dma_data_direction dir)
-{
-	/* we use coherent allocation, so not much to do here. */
-}
-
-extern dma_addr_t dma_map_single(struct device *, void *, size_t,
-				 enum dma_data_direction);
-static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
-				    size_t size, enum dma_data_direction dir)
-{
-}
-
-extern dma_addr_t dma_map_page(struct device *, struct page *,
-			       unsigned long, size_t size,
-			       enum dma_data_direction);
-static inline void dma_unmap_page(struct device *dev, dma_addr_t address,
-				  size_t size, enum dma_data_direction dir)
-{
-}
-
-extern int dma_map_sg(struct device *, struct scatterlist *, int,
-		      enum dma_data_direction);
-static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-				int nhwentries, enum dma_data_direction dir)
-{
-}
-
-extern void dma_sync_single_for_device(struct device *, dma_addr_t, size_t,
-				       enum dma_data_direction);
-extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
-				   enum dma_data_direction);
-
-static inline void dma_sync_single_range_for_device(struct device *dev,
-		dma_addr_t dma_handle, unsigned long offset, size_t size,
-		enum dma_data_direction direction)
-{
-	/* just sync everything for now */
-	dma_sync_single_for_device(dev, dma_handle, offset + size, direction);
-}
-
-static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
-					   size_t size, enum dma_data_direction dir)
-{
-}
-
-static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-				       int nents, enum dma_data_direction dir)
-{
-}
-
-static inline void dma_sync_single_range_for_cpu(struct device *dev,
-		dma_addr_t dma_handle, unsigned long offset, size_t size,
-		enum dma_data_direction direction)
-{
-	/* just sync everything for now */
-	dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
-{
-	return 0;
-}
-
-#else
-#include <asm-generic/dma-mapping-broken.h>
-#endif
-
-#endif  /* _M68K_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/dma-mapping_no.h b/arch/m68k/include/asm/dma-mapping_no.h
deleted file mode 100644
index 1748f2b..0000000
--- a/arch/m68k/include/asm/dma-mapping_no.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M68KNOMMU_DMA_MAPPING_H
-#define _M68KNOMMU_DMA_MAPPING_H
-
-#include <asm-generic/dma-mapping-broken.h>
-
-#endif  /* _M68KNOMMU_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/elf.h b/arch/m68k/include/asm/elf.h
index 04ce488..0b0f49e 100644
--- a/arch/m68k/include/asm/elf.h
+++ b/arch/m68k/include/asm/elf.h
@@ -1,5 +1,119 @@
-#ifdef __uClinux__
-#include "elf_no.h"
+#ifndef __ASMm68k_ELF_H
+#define __ASMm68k_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+/*
+ * 68k ELF relocation types
+ */
+#define R_68K_NONE	0
+#define R_68K_32	1
+#define R_68K_16	2
+#define R_68K_8		3
+#define R_68K_PC32	4
+#define R_68K_PC16	5
+#define R_68K_PC8	6
+#define R_68K_GOT32	7
+#define R_68K_GOT16	8
+#define R_68K_GOT8	9
+#define R_68K_GOT32O	10
+#define R_68K_GOT16O	11
+#define R_68K_GOT8O	12
+#define R_68K_PLT32	13
+#define R_68K_PLT16	14
+#define R_68K_PLT8	15
+#define R_68K_PLT32O	16
+#define R_68K_PLT16O	17
+#define R_68K_PLT8O	18
+#define R_68K_COPY	19
+#define R_68K_GLOB_DAT	20
+#define R_68K_JMP_SLOT	21
+#define R_68K_RELATIVE	22
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_m68kfp_struct elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_68K)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2MSB
+#define ELF_ARCH	EM_68K
+
+/* For SVR4/m68k the function pointer to be registered with `atexit' is
+   passed in %a1.  Although my copy of the ABI has no such statement, it
+   is actually used on ASV.  */
+#define ELF_PLAT_INIT(_r, load_addr)	_r->a1 = 0
+
+#define USE_ELF_CORE_DUMP
+#ifndef CONFIG_SUN3
+#define ELF_EXEC_PAGESIZE	4096
 #else
-#include "elf_mm.h"
+#define ELF_EXEC_PAGESIZE	8192
+#endif
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#ifndef CONFIG_SUN3
+#define ELF_ET_DYN_BASE         0xD0000000UL
+#else
+#define ELF_ET_DYN_BASE         0x0D800000UL
+#endif
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs)				\
+	/* Bleech. */							\
+	pr_reg[0] = regs->d1;						\
+	pr_reg[1] = regs->d2;						\
+	pr_reg[2] = regs->d3;						\
+	pr_reg[3] = regs->d4;						\
+	pr_reg[4] = regs->d5;						\
+	pr_reg[7] = regs->a0;						\
+	pr_reg[8] = regs->a1;						\
+	pr_reg[9] = regs->a2;						\
+	pr_reg[14] = regs->d0;						\
+	pr_reg[15] = rdusp();						\
+	pr_reg[16] = regs->orig_d0;					\
+	pr_reg[17] = regs->sr;						\
+	pr_reg[18] = regs->pc;						\
+	pr_reg[19] = (regs->format << 12) | regs->vector;		\
+	{								\
+	  struct switch_stack *sw = ((struct switch_stack *)regs) - 1;	\
+	  pr_reg[5] = sw->d6;						\
+	  pr_reg[6] = sw->d7;						\
+	  pr_reg[10] = sw->a3;						\
+	  pr_reg[11] = sw->a4;						\
+	  pr_reg[12] = sw->a5;						\
+	  pr_reg[13] = sw->a6;						\
+	}
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM  (NULL)
+
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
+
 #endif
diff --git a/arch/m68k/include/asm/elf_mm.h b/arch/m68k/include/asm/elf_mm.h
deleted file mode 100644
index 0b0f49e..0000000
--- a/arch/m68k/include/asm/elf_mm.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef __ASMm68k_ELF_H
-#define __ASMm68k_ELF_H
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/user.h>
-
-/*
- * 68k ELF relocation types
- */
-#define R_68K_NONE	0
-#define R_68K_32	1
-#define R_68K_16	2
-#define R_68K_8		3
-#define R_68K_PC32	4
-#define R_68K_PC16	5
-#define R_68K_PC8	6
-#define R_68K_GOT32	7
-#define R_68K_GOT16	8
-#define R_68K_GOT8	9
-#define R_68K_GOT32O	10
-#define R_68K_GOT16O	11
-#define R_68K_GOT8O	12
-#define R_68K_PLT32	13
-#define R_68K_PLT16	14
-#define R_68K_PLT8	15
-#define R_68K_PLT32O	16
-#define R_68K_PLT16O	17
-#define R_68K_PLT8O	18
-#define R_68K_COPY	19
-#define R_68K_GLOB_DAT	20
-#define R_68K_JMP_SLOT	21
-#define R_68K_RELATIVE	22
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_m68kfp_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) ((x)->e_machine == EM_68K)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA	ELFDATA2MSB
-#define ELF_ARCH	EM_68K
-
-/* For SVR4/m68k the function pointer to be registered with `atexit' is
-   passed in %a1.  Although my copy of the ABI has no such statement, it
-   is actually used on ASV.  */
-#define ELF_PLAT_INIT(_r, load_addr)	_r->a1 = 0
-
-#define USE_ELF_CORE_DUMP
-#ifndef CONFIG_SUN3
-#define ELF_EXEC_PAGESIZE	4096
-#else
-#define ELF_EXEC_PAGESIZE	8192
-#endif
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#ifndef CONFIG_SUN3
-#define ELF_ET_DYN_BASE         0xD0000000UL
-#else
-#define ELF_ET_DYN_BASE         0x0D800000UL
-#endif
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs)				\
-	/* Bleech. */							\
-	pr_reg[0] = regs->d1;						\
-	pr_reg[1] = regs->d2;						\
-	pr_reg[2] = regs->d3;						\
-	pr_reg[3] = regs->d4;						\
-	pr_reg[4] = regs->d5;						\
-	pr_reg[7] = regs->a0;						\
-	pr_reg[8] = regs->a1;						\
-	pr_reg[9] = regs->a2;						\
-	pr_reg[14] = regs->d0;						\
-	pr_reg[15] = rdusp();						\
-	pr_reg[16] = regs->orig_d0;					\
-	pr_reg[17] = regs->sr;						\
-	pr_reg[18] = regs->pc;						\
-	pr_reg[19] = (regs->format << 12) | regs->vector;		\
-	{								\
-	  struct switch_stack *sw = ((struct switch_stack *)regs) - 1;	\
-	  pr_reg[5] = sw->d6;						\
-	  pr_reg[6] = sw->d7;						\
-	  pr_reg[10] = sw->a3;						\
-	  pr_reg[11] = sw->a4;						\
-	  pr_reg[12] = sw->a5;						\
-	  pr_reg[13] = sw->a6;						\
-	}
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this cpu supports.  */
-
-#define ELF_HWCAP	(0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.  */
-
-#define ELF_PLATFORM  (NULL)
-
-#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
-
-#endif
diff --git a/arch/m68k/include/asm/elf_no.h b/arch/m68k/include/asm/elf_no.h
deleted file mode 100644
index b804683..0000000
--- a/arch/m68k/include/asm/elf_no.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef __ASMm68k_ELF_H
-#define __ASMm68k_ELF_H
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/user.h>
-
-/*
- * 68k ELF relocation types
- */
-#define R_68K_NONE  0
-#define R_68K_32    1
-#define R_68K_16    2
-#define R_68K_8     3
-#define R_68K_PC32  4
-#define R_68K_PC16  5
-#define R_68K_PC8   6
-#define R_68K_GOT32 7
-#define R_68K_GOT16 8
-#define R_68K_GOT8  9
-#define R_68K_GOT32O    10
-#define R_68K_GOT16O    11
-#define R_68K_GOT8O 12
-#define R_68K_PLT32 13
-#define R_68K_PLT16 14
-#define R_68K_PLT8  15
-#define R_68K_PLT32O    16
-#define R_68K_PLT16O    17
-#define R_68K_PLT8O 18
-#define R_68K_COPY  19
-#define R_68K_GLOB_DAT  20
-#define R_68K_JMP_SLOT  21
-#define R_68K_RELATIVE  22
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_m68kfp_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) ((x)->e_machine == EM_68K)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA	ELFDATA2MSB
-#define ELF_ARCH	EM_68K
-
-/* For SVR4/m68k the function pointer to be registered with `atexit' is
-   passed in %a1.  Although my copy of the ABI has no such statement, it
-   is actually used on ASV.  */
-#define ELF_PLAT_INIT(_r, load_addr)	_r->a1 = 0
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE	4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE         0xD0000000UL
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs)				\
-	/* Bleech. */							\
-	pr_reg[0] = regs->d1;						\
-	pr_reg[1] = regs->d2;						\
-	pr_reg[2] = regs->d3;						\
-	pr_reg[3] = regs->d4;						\
-	pr_reg[4] = regs->d5;						\
-	pr_reg[7] = regs->a0;						\
-	pr_reg[8] = regs->a1;						\
-	pr_reg[14] = regs->d0;						\
-	pr_reg[15] = rdusp();						\
-	pr_reg[16] = 0 /* regs->orig_d0 */;				\
-	pr_reg[17] = regs->sr;						\
-	pr_reg[18] = regs->pc;						\
-	/* pr_reg[19] = (regs->format << 12) | regs->vector; */		\
-	{								\
-	  struct switch_stack *sw = ((struct switch_stack *)regs) - 1;	\
-	  pr_reg[5] = sw->d6;						\
-	  pr_reg[6] = sw->d7;						\
-	  pr_reg[10] = sw->a3;						\
-	  pr_reg[11] = sw->a4;						\
-	  pr_reg[12] = sw->a5;						\
-	  pr_reg[13] = sw->a6;						\
-	}
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this cpu supports.  */
-
-#define ELF_HWCAP	(0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.  */
-
-#define ELF_PLATFORM  (NULL)
-
-#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
-
-#endif
diff --git a/arch/m68k/include/asm/fb.h b/arch/m68k/include/asm/fb.h
index 97bcaef..be4e4c6 100644
--- a/arch/m68k/include/asm/fb.h
+++ b/arch/m68k/include/asm/fb.h
@@ -1,5 +1,38 @@
-#ifdef __uClinux__
-#include "fb_no.h"
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_MMU
+#ifdef CONFIG_SUN3
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
+}
 #else
-#include "fb_mm.h"
-#endif
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	if (CPU_IS_020_OR_030)
+		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
+	if (CPU_IS_040_OR_060) {
+		pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
+		/* Use no-cache mode, serialized */
+		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
+	}
+}
+#endif /* CONFIG_SUN3 */
+#else
+#define fb_pgprotect(...) do {} while (0)
+#endif /* CONFIG_MMU */
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fb_mm.h b/arch/m68k/include/asm/fb_mm.h
deleted file mode 100644
index 380b97a..0000000
--- a/arch/m68k/include/asm/fb_mm.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-
-#ifdef CONFIG_SUN3
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{
-	pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
-}
-#else
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{
-	if (CPU_IS_020_OR_030)
-		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
-	if (CPU_IS_040_OR_060) {
-		pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
-		/* Use no-cache mode, serialized */
-		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
-	}
-}
-#endif /* CONFIG_SUN3 */
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fb_no.h b/arch/m68k/include/asm/fb_no.h
deleted file mode 100644
index c7df380..0000000
--- a/arch/m68k/include/asm/fb_no.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fpu.h b/arch/m68k/include/asm/fpu.h
index e19bc5e..ffb6b8c 100644
--- a/arch/m68k/include/asm/fpu.h
+++ b/arch/m68k/include/asm/fpu.h
@@ -1,5 +1,21 @@
-#ifdef __uClinux__
-#include "fpu_no.h"
+#ifndef __M68K_FPU_H
+#define __M68K_FPU_H
+
+
+/*
+ * MAX floating point unit state size (FSAVE/FRESTORE)
+ */
+
+#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
+#define FPSTATESIZE (216)
+#elif defined(CONFIG_M68040)
+#define FPSTATESIZE (96)
+#elif defined(CONFIG_M68KFPU_EMU)
+#define FPSTATESIZE (28)
+#elif defined(CONFIG_M68060)
+#define FPSTATESIZE (12)
 #else
-#include "fpu_mm.h"
+#define FPSTATESIZE (0)
 #endif
+
+#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/fpu_mm.h b/arch/m68k/include/asm/fpu_mm.h
deleted file mode 100644
index ffb6b8c..0000000
--- a/arch/m68k/include/asm/fpu_mm.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __M68K_FPU_H
-#define __M68K_FPU_H
-
-
-/*
- * MAX floating point unit state size (FSAVE/FRESTORE)
- */
-
-#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
-#define FPSTATESIZE (216)
-#elif defined(CONFIG_M68040)
-#define FPSTATESIZE (96)
-#elif defined(CONFIG_M68KFPU_EMU)
-#define FPSTATESIZE (28)
-#elif defined(CONFIG_M68060)
-#define FPSTATESIZE (12)
-#else
-#define FPSTATESIZE (0)
-#endif
-
-#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/fpu_no.h b/arch/m68k/include/asm/fpu_no.h
deleted file mode 100644
index b16b2e4..0000000
--- a/arch/m68k/include/asm/fpu_no.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __M68KNOMMU_FPU_H
-#define __M68KNOMMU_FPU_H
-
-
-/*
- * MAX floating point unit state size (FSAVE/FRESTORE)
- */
-#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
-#define FPSTATESIZE (216/sizeof(unsigned char))
-#elif defined(CONFIG_M68040)
-#define FPSTATESIZE (96/sizeof(unsigned char))
-#elif defined(CONFIG_M68KFPU_EMU)
-#define FPSTATESIZE (28/sizeof(unsigned char))
-#elif defined(CONFIG_M68060)
-#define FPSTATESIZE (12/sizeof(unsigned char))
-#else
-/* Assume no FP unit present then... */
-#define FPSTATESIZE (2) /* dummy size */
-#endif
-
-#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/hw_irq.h b/arch/m68k/include/asm/hw_irq.h
index e195260..eacef09 100644
--- a/arch/m68k/include/asm/hw_irq.h
+++ b/arch/m68k/include/asm/hw_irq.h
@@ -1,5 +1,6 @@
-#ifdef __uClinux__
-#include "hw_irq_no.h"
-#else
-#include "hw_irq_mm.h"
+#ifndef __ASM_M68K_HW_IRQ_H
+#define __ASM_M68K_HW_IRQ_H
+
+/* Dummy include. */
+
 #endif
diff --git a/arch/m68k/include/asm/hw_irq_mm.h b/arch/m68k/include/asm/hw_irq_mm.h
deleted file mode 100644
index eacef09..0000000
--- a/arch/m68k/include/asm/hw_irq_mm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_M68K_HW_IRQ_H
-#define __ASM_M68K_HW_IRQ_H
-
-/* Dummy include. */
-
-#endif
diff --git a/arch/m68k/include/asm/hw_irq_no.h b/arch/m68k/include/asm/hw_irq_no.h
deleted file mode 100644
index f3ec9e5..0000000
--- a/arch/m68k/include/asm/hw_irq_no.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __M68KNOMMU_HW_IRQ_H__
-#define __M68KNOMMU_HW_IRQ_H__
-
-#endif /* __M68KNOMMU_HW_IRQ_H__ */
diff --git a/arch/m68k/include/asm/kmap_types.h b/arch/m68k/include/asm/kmap_types.h
index 045d9fd..c843c63 100644
--- a/arch/m68k/include/asm/kmap_types.h
+++ b/arch/m68k/include/asm/kmap_types.h
@@ -1,5 +1,21 @@
-#ifdef __uClinux__
-#include "kmap_types_no.h"
-#else
-#include "kmap_types_mm.h"
-#endif
+#ifndef __ASM_M68K_KMAP_TYPES_H
+#define __ASM_M68K_KMAP_TYPES_H
+
+enum km_type {
+	KM_BOUNCE_READ,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	KM_TYPE_NR
+};
+
+#endif	/* __ASM_M68K_KMAP_TYPES_H */
diff --git a/arch/m68k/include/asm/kmap_types_mm.h b/arch/m68k/include/asm/kmap_types_mm.h
deleted file mode 100644
index c843c63..0000000
--- a/arch/m68k/include/asm/kmap_types_mm.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __ASM_M68K_KMAP_TYPES_H
-#define __ASM_M68K_KMAP_TYPES_H
-
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
-
-#endif	/* __ASM_M68K_KMAP_TYPES_H */
diff --git a/arch/m68k/include/asm/kmap_types_no.h b/arch/m68k/include/asm/kmap_types_no.h
deleted file mode 100644
index bfb6707..0000000
--- a/arch/m68k/include/asm/kmap_types_no.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __ASM_M68K_KMAP_TYPES_H
-#define __ASM_M68K_KMAP_TYPES_H
-
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
-
-#endif
diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h
index 1835fd2..ce60345 100644
--- a/arch/m68k/include/asm/m532xsim.h
+++ b/arch/m68k/include/asm/m532xsim.h
@@ -16,6 +16,7 @@
 #define MCFINT_VECBASE      64
 #define MCFINT_UART0        26          /* Interrupt number for UART0 */
 #define MCFINT_UART1        27          /* Interrupt number for UART1 */
+#define MCFINT_UART2        28          /* Interrupt number for UART2 */
 
 #define MCF_WTM_WCR	MCF_REG16(0xFC098000)
 
diff --git a/arch/m68k/include/asm/mc146818rtc.h b/arch/m68k/include/asm/mc146818rtc.h
index fb90dcf..9f70a01 100644
--- a/arch/m68k/include/asm/mc146818rtc.h
+++ b/arch/m68k/include/asm/mc146818rtc.h
@@ -1,5 +1,26 @@
-#ifdef __uClinux__
-#include "mc146818rtc_no.h"
-#else
-#include "mc146818rtc_mm.h"
-#endif
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+
+#ifdef CONFIG_ATARI
+/* RTC in Atari machines */
+
+#include <asm/atarihw.h>
+
+#define RTC_PORT(x)	(TT_RTC_BAS + 2*(x))
+#define RTC_ALWAYS_BCD	0
+
+#define CMOS_READ(addr) ({ \
+atari_outb_p((addr),RTC_PORT(0)); \
+atari_inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+atari_outb_p((addr),RTC_PORT(0)); \
+atari_outb_p((val),RTC_PORT(1)); \
+})
+#endif /* CONFIG_ATARI */
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mc146818rtc_mm.h b/arch/m68k/include/asm/mc146818rtc_mm.h
deleted file mode 100644
index 9f70a01..0000000
--- a/arch/m68k/include/asm/mc146818rtc_mm.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-
-#ifdef CONFIG_ATARI
-/* RTC in Atari machines */
-
-#include <asm/atarihw.h>
-
-#define RTC_PORT(x)	(TT_RTC_BAS + 2*(x))
-#define RTC_ALWAYS_BCD	0
-
-#define CMOS_READ(addr) ({ \
-atari_outb_p((addr),RTC_PORT(0)); \
-atari_inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-atari_outb_p((addr),RTC_PORT(0)); \
-atari_outb_p((val),RTC_PORT(1)); \
-})
-#endif /* CONFIG_ATARI */
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mc146818rtc_no.h b/arch/m68k/include/asm/mc146818rtc_no.h
deleted file mode 100644
index 907a0481..0000000
--- a/arch/m68k/include/asm/mc146818rtc_no.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _M68KNOMMU_MC146818RTC_H
-#define _M68KNOMMU_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */
-
-#endif /* _M68KNOMMU_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mcfpci.h b/arch/m68k/include/asm/mcfpci.h
deleted file mode 100644
index f1507dd..0000000
--- a/arch/m68k/include/asm/mcfpci.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/****************************************************************************/
-
-/*
- *	mcfpci.h -- PCI bridge on ColdFire eval boards.
- *
- *	(C) Copyright 2000, Greg Ungerer (gerg@snapgear.com)
- *	(C) Copyright 2000, Lineo Inc. (www.lineo.com)
- */
-
-/****************************************************************************/
-#ifndef	mcfpci_h
-#define	mcfpci_h
-/****************************************************************************/
-
-
-#ifdef CONFIG_PCI
-
-/*
- *	Address regions in the PCI address space are not mapped into the
- *	normal memory space of the ColdFire. They must be accessed via
- *	handler routines. This is easy for I/O space (inb/outb/etc) but
- *	needs some code changes to support ordinary memory. Interrupts
- *	also need to be vectored through the PCI handler first, then it
- *	will call the actual driver sub-handlers.
- */
-
-/*
- *	Un-define all the standard I/O access routines.
- */
-#undef	inb
-#undef	inw
-#undef	inl
-#undef	inb_p
-#undef	inw_p
-#undef	insb
-#undef	insw
-#undef	insl
-#undef	outb
-#undef	outw
-#undef	outl
-#undef	outb_p
-#undef	outw_p
-#undef	outsb
-#undef	outsw
-#undef	outsl
-
-#undef	request_irq
-#undef	free_irq
-
-#undef	bus_to_virt
-#undef	virt_to_bus
-
-
-/*
- *	Re-direct all I/O memory accesses functions to PCI specific ones.
- */
-#define	inb	pci_inb
-#define	inw	pci_inw
-#define	inl	pci_inl
-#define	inb_p	pci_inb
-#define	inw_p	pci_inw
-#define	insb	pci_insb
-#define	insw	pci_insw
-#define	insl	pci_insl
-
-#define	outb	pci_outb
-#define	outw	pci_outw
-#define	outl	pci_outl
-#define	outb_p	pci_outb
-#define	outw_p	pci_outw
-#define	outsb	pci_outsb
-#define	outsw	pci_outsw
-#define	outsl	pci_outsl
-
-#define	request_irq	pci_request_irq
-#define	free_irq	pci_free_irq
-
-#define	virt_to_bus	pci_virt_to_bus
-#define	bus_to_virt	pci_bus_to_virt
-
-#define	CONFIG_COMEMPCI	1
-
-
-/*
- *	Prototypes of the real PCI functions (defined in bios32.c).
- */
-unsigned char	pci_inb(unsigned int addr);
-unsigned short	pci_inw(unsigned int addr);
-unsigned int	pci_inl(unsigned int addr);
-void		pci_insb(void *addr, void *buf, int len);
-void		pci_insw(void *addr, void *buf, int len);
-void		pci_insl(void *addr, void *buf, int len);
-
-void		pci_outb(unsigned char val, unsigned int addr);
-void		pci_outw(unsigned short val, unsigned int addr);
-void		pci_outl(unsigned int val, unsigned int addr);
-void		pci_outsb(void *addr, void *buf, int len);
-void		pci_outsw(void *addr, void *buf, int len);
-void		pci_outsl(void *addr, void *buf, int len);
-
-int		pci_request_irq(unsigned int irq,
-			void (*handler)(int, void *, struct pt_regs *),
-			unsigned long flags,
-			const char *device,
-			void *dev_id);
-void		pci_free_irq(unsigned int irq, void *dev_id);
-
-void		*pci_bmalloc(int size);
-void		pci_bmfree(void *bmp, int len);
-void		pci_copytoshmem(unsigned long bmp, void *src, int size);
-void		pci_copyfromshmem(void *dst, unsigned long bmp, int size);
-unsigned long	pci_virt_to_bus(volatile void *address);
-void		*pci_bus_to_virt(unsigned long address);
-void		pci_bmcpyto(void *dst, void *src, int len);
-void		pci_bmcpyfrom(void *dst, void *src, int len);
-
-#endif /* CONFIG_PCI */
-/****************************************************************************/
-#endif	/* mcfpci_h */
diff --git a/arch/m68k/include/asm/mmu.h b/arch/m68k/include/asm/mmu.h
index a81d394..8a11a63 100644
--- a/arch/m68k/include/asm/mmu.h
+++ b/arch/m68k/include/asm/mmu.h
@@ -1,5 +1,13 @@
-#ifdef __uClinux__
-#include "mmu_no.h"
+#ifndef __MMU_H
+#define __MMU_H
+
+#ifdef CONFIG_MMU
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
 #else
-#include "mmu_mm.h"
+typedef struct {
+	unsigned long		end_brk;
+} mm_context_t;
+#endif
+
 #endif
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index b440928..7d4341e 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -1,5 +1,175 @@
-#ifdef __uClinux__
-#include "mmu_context_no.h"
+#ifndef __M68K_MMU_CONTEXT_H
+#define __M68K_MMU_CONTEXT_H
+
+#include <asm-generic/mm_hooks.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+#ifdef CONFIG_MMU
+#ifndef CONFIG_SUN3
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	mm->context = virt_to_phys(mm->pgd);
+	return 0;
+}
+
+#define destroy_context(mm)		do { } while(0)
+
+static inline void switch_mm_0230(struct mm_struct *mm)
+{
+	unsigned long crp[2] = {
+		0x80000000 | _PAGE_TABLE, mm->context
+	};
+	unsigned long tmp;
+
+	asm volatile (".chip 68030");
+
+	/* flush MC68030/MC68020 caches (they are virtually addressed) */
+	asm volatile (
+		"movec %%cacr,%0;"
+		"orw %1,%0; "
+		"movec %0,%%cacr"
+		: "=d" (tmp) : "di" (FLUSH_I_AND_D));
+
+	/* Switch the root pointer. For a 030-only kernel,
+	 * avoid flushing the whole ATC, we only need to
+	 * flush the user entries. The 68851 does this by
+	 * itself. Avoid a runtime check here.
+	 */
+	asm volatile (
+#ifdef CPU_M68030_ONLY
+		"pmovefd %0,%%crp; "
+		"pflush #0,#4"
 #else
-#include "mmu_context_mm.h"
+		"pmove %0,%%crp"
 #endif
+		: : "m" (crp[0]));
+
+	asm volatile (".chip 68k");
+}
+
+static inline void switch_mm_0460(struct mm_struct *mm)
+{
+	asm volatile (".chip 68040");
+
+	/* flush address translation cache (user entries) */
+	asm volatile ("pflushan");
+
+	/* switch the root pointer */
+	asm volatile ("movec %0,%%urp" : : "r" (mm->context));
+
+	if (CPU_IS_060) {
+		unsigned long tmp;
+
+		/* clear user entries in the branch cache */
+		asm volatile (
+			"movec %%cacr,%0; "
+		        "orl %1,%0; "
+		        "movec %0,%%cacr"
+			: "=d" (tmp): "di" (0x00200000));
+	}
+
+	asm volatile (".chip 68k");
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+	if (prev != next) {
+		if (CPU_IS_020_OR_030)
+			switch_mm_0230(next);
+		else
+			switch_mm_0460(next);
+	}
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	next_mm->context = virt_to_phys(next_mm->pgd);
+
+	if (CPU_IS_020_OR_030)
+		switch_mm_0230(next_mm);
+	else
+		switch_mm_0460(next_mm);
+}
+
+#else  /* CONFIG_SUN3 */
+#include <asm/sun3mmu.h>
+#include <linux/sched.h>
+
+extern unsigned long get_free_context(struct mm_struct *mm);
+extern void clear_context(unsigned long context);
+
+/* set the context for a new task to unmapped */
+static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	mm->context = SUN3_INVALID_CONTEXT;
+	return 0;
+}
+
+/* find the context given to this process, and if it hasn't already
+   got one, go get one for it. */
+static inline void get_mmu_context(struct mm_struct *mm)
+{
+	if(mm->context == SUN3_INVALID_CONTEXT)
+		mm->context = get_free_context(mm);
+}
+
+/* flush context if allocated... */
+static inline void destroy_context(struct mm_struct *mm)
+{
+	if(mm->context != SUN3_INVALID_CONTEXT)
+		clear_context(mm->context);
+}
+
+static inline void activate_context(struct mm_struct *mm)
+{
+	get_mmu_context(mm);
+	sun3_put_context(mm->context);
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+	activate_context(tsk->mm);
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	activate_context(next_mm);
+}
+
+#endif
+#else /* !CONFIG_MMU */
+
+static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	return 0;
+}
+
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+}
+
+#define destroy_context(mm)	do { } while (0)
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
+{
+}
+
+#endif /* CONFIG_MMU */
+#endif /* __M68K_MMU_CONTEXT_H */
diff --git a/arch/m68k/include/asm/mmu_context_mm.h b/arch/m68k/include/asm/mmu_context_mm.h
deleted file mode 100644
index 894dacb..0000000
--- a/arch/m68k/include/asm/mmu_context_mm.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef __M68K_MMU_CONTEXT_H
-#define __M68K_MMU_CONTEXT_H
-
-#include <asm-generic/mm_hooks.h>
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-#ifndef CONFIG_SUN3
-
-#include <asm/setup.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-
-static inline int init_new_context(struct task_struct *tsk,
-				   struct mm_struct *mm)
-{
-	mm->context = virt_to_phys(mm->pgd);
-	return 0;
-}
-
-#define destroy_context(mm)		do { } while(0)
-
-static inline void switch_mm_0230(struct mm_struct *mm)
-{
-	unsigned long crp[2] = {
-		0x80000000 | _PAGE_TABLE, mm->context
-	};
-	unsigned long tmp;
-
-	asm volatile (".chip 68030");
-
-	/* flush MC68030/MC68020 caches (they are virtually addressed) */
-	asm volatile (
-		"movec %%cacr,%0;"
-		"orw %1,%0; "
-		"movec %0,%%cacr"
-		: "=d" (tmp) : "di" (FLUSH_I_AND_D));
-
-	/* Switch the root pointer. For a 030-only kernel,
-	 * avoid flushing the whole ATC, we only need to
-	 * flush the user entries. The 68851 does this by
-	 * itself. Avoid a runtime check here.
-	 */
-	asm volatile (
-#ifdef CPU_M68030_ONLY
-		"pmovefd %0,%%crp; "
-		"pflush #0,#4"
-#else
-		"pmove %0,%%crp"
-#endif
-		: : "m" (crp[0]));
-
-	asm volatile (".chip 68k");
-}
-
-static inline void switch_mm_0460(struct mm_struct *mm)
-{
-	asm volatile (".chip 68040");
-
-	/* flush address translation cache (user entries) */
-	asm volatile ("pflushan");
-
-	/* switch the root pointer */
-	asm volatile ("movec %0,%%urp" : : "r" (mm->context));
-
-	if (CPU_IS_060) {
-		unsigned long tmp;
-
-		/* clear user entries in the branch cache */
-		asm volatile (
-			"movec %%cacr,%0; "
-		        "orl %1,%0; "
-		        "movec %0,%%cacr"
-			: "=d" (tmp): "di" (0x00200000));
-	}
-
-	asm volatile (".chip 68k");
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
-{
-	if (prev != next) {
-		if (CPU_IS_020_OR_030)
-			switch_mm_0230(next);
-		else
-			switch_mm_0460(next);
-	}
-}
-
-#define deactivate_mm(tsk,mm)	do { } while (0)
-
-static inline void activate_mm(struct mm_struct *prev_mm,
-			       struct mm_struct *next_mm)
-{
-	next_mm->context = virt_to_phys(next_mm->pgd);
-
-	if (CPU_IS_020_OR_030)
-		switch_mm_0230(next_mm);
-	else
-		switch_mm_0460(next_mm);
-}
-
-#else  /* CONFIG_SUN3 */
-#include <asm/sun3mmu.h>
-#include <linux/sched.h>
-
-extern unsigned long get_free_context(struct mm_struct *mm);
-extern void clear_context(unsigned long context);
-
-/* set the context for a new task to unmapped */
-static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	mm->context = SUN3_INVALID_CONTEXT;
-	return 0;
-}
-
-/* find the context given to this process, and if it hasn't already
-   got one, go get one for it. */
-static inline void get_mmu_context(struct mm_struct *mm)
-{
-	if(mm->context == SUN3_INVALID_CONTEXT)
-		mm->context = get_free_context(mm);
-}
-
-/* flush context if allocated... */
-static inline void destroy_context(struct mm_struct *mm)
-{
-	if(mm->context != SUN3_INVALID_CONTEXT)
-		clear_context(mm->context);
-}
-
-static inline void activate_context(struct mm_struct *mm)
-{
-	get_mmu_context(mm);
-	sun3_put_context(mm->context);
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
-{
-	activate_context(tsk->mm);
-}
-
-#define deactivate_mm(tsk,mm)	do { } while (0)
-
-static inline void activate_mm(struct mm_struct *prev_mm,
-			       struct mm_struct *next_mm)
-{
-	activate_context(next_mm);
-}
-
-#endif
-#endif
diff --git a/arch/m68k/include/asm/mmu_context_no.h b/arch/m68k/include/asm/mmu_context_no.h
deleted file mode 100644
index 9ccee42..0000000
--- a/arch/m68k/include/asm/mmu_context_no.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __M68KNOMMU_MMU_CONTEXT_H
-#define __M68KNOMMU_MMU_CONTEXT_H
-
-#include <asm/setup.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm-generic/mm_hooks.h>
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-static inline int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	// mm->context = virt_to_phys(mm->pgd);
-	return(0);
-}
-
-#define destroy_context(mm)		do { } while(0)
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
-{
-}
-
-#define deactivate_mm(tsk,mm)	do { } while (0)
-
-static inline void activate_mm(struct mm_struct *prev_mm,
-			       struct mm_struct *next_mm)
-{
-}
-
-#endif
diff --git a/arch/m68k/include/asm/mmu_mm.h b/arch/m68k/include/asm/mmu_mm.h
deleted file mode 100644
index ccd36d2..0000000
--- a/arch/m68k/include/asm/mmu_mm.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __MMU_H
-#define __MMU_H
-
-/* Default "unsigned long" context */
-typedef unsigned long mm_context_t;
-
-#endif
diff --git a/arch/m68k/include/asm/mmu_no.h b/arch/m68k/include/asm/mmu_no.h
deleted file mode 100644
index e2da1e6..0000000
--- a/arch/m68k/include/asm/mmu_no.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __M68KNOMMU_MMU_H
-#define __M68KNOMMU_MMU_H
-
-/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
-
-typedef struct {
-	unsigned long		end_brk;
-} mm_context_t;
-
-#endif /* __M68KNOMMU_MMU_H */
diff --git a/arch/m68k/include/asm/module.h b/arch/m68k/include/asm/module.h
index 79b59d1..5f21e11 100644
--- a/arch/m68k/include/asm/module.h
+++ b/arch/m68k/include/asm/module.h
@@ -1,5 +1,48 @@
-#ifdef __uClinux__
-#include "module_no.h"
+#ifndef _ASM_M68K_MODULE_H
+#define _ASM_M68K_MODULE_H
+
+#ifdef CONFIG_MMU
+
+struct mod_arch_specific {
+	struct m68k_fixup_info *fixup_start, *fixup_end;
+};
+
+#define MODULE_ARCH_INIT {				\
+	.fixup_start		= __start_fixup,	\
+	.fixup_end		= __stop_fixup,		\
+}
+
+
+enum m68k_fixup_type {
+	m68k_fixup_memoffset,
+	m68k_fixup_vnode_shift,
+};
+
+struct m68k_fixup_info {
+	enum m68k_fixup_type type;
+	void *addr;
+};
+
+#define m68k_fixup(type, addr)			\
+	"	.section \".m68k_fixup\",\"aw\"\n"	\
+	"	.long " #type "," #addr "\n"	\
+	"	.previous\n"
+
+extern struct m68k_fixup_info __start_fixup[], __stop_fixup[];
+
+struct module;
+extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
+			 struct m68k_fixup_info *end);
+
 #else
-#include "module_mm.h"
-#endif
+
+struct mod_arch_specific {
+};
+
+#endif /* CONFIG_MMU */
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/m68k/include/asm/module_mm.h b/arch/m68k/include/asm/module_mm.h
deleted file mode 100644
index 382d20a..0000000
--- a/arch/m68k/include/asm/module_mm.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _ASM_M68K_MODULE_H
-#define _ASM_M68K_MODULE_H
-
-struct mod_arch_specific {
-	struct m68k_fixup_info *fixup_start, *fixup_end;
-};
-
-#define MODULE_ARCH_INIT {				\
-	.fixup_start		= __start_fixup,	\
-	.fixup_end		= __stop_fixup,		\
-}
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-
-enum m68k_fixup_type {
-	m68k_fixup_memoffset,
-	m68k_fixup_vnode_shift,
-};
-
-struct m68k_fixup_info {
-	enum m68k_fixup_type type;
-	void *addr;
-};
-
-#define m68k_fixup(type, addr)			\
-	"	.section \".m68k_fixup\",\"aw\"\n"	\
-	"	.long " #type "," #addr "\n"	\
-	"	.previous\n"
-
-extern struct m68k_fixup_info __start_fixup[], __stop_fixup[];
-
-struct module;
-extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
-			 struct m68k_fixup_info *end);
-
-#endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/m68k/include/asm/module_no.h b/arch/m68k/include/asm/module_no.h
deleted file mode 100644
index 2e45ab5..0000000
--- a/arch/m68k/include/asm/module_no.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef ASM_M68KNOMMU_MODULE_H
-#define ASM_M68KNOMMU_MODULE_H
-
-struct mod_arch_specific {
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#endif /* ASM_M68KNOMMU_MODULE_H */
diff --git a/arch/m68k/include/asm/page_offset.h b/arch/m68k/include/asm/page_offset.h
index 66455c8..1780152 100644
--- a/arch/m68k/include/asm/page_offset.h
+++ b/arch/m68k/include/asm/page_offset.h
@@ -1,5 +1,11 @@
-#ifdef __uClinux__
-#include "page_offset_no.h"
+/* This handles the memory map.. */
+
+#ifdef CONFIG_MMU
+#ifndef CONFIG_SUN3
+#define PAGE_OFFSET_RAW		0x00000000
 #else
-#include "page_offset_mm.h"
+#define PAGE_OFFSET_RAW		0x0E000000
+#endif
+#else
+#define	PAGE_OFFSET_RAW		CONFIG_RAMBASE
 #endif
diff --git a/arch/m68k/include/asm/page_offset_mm.h b/arch/m68k/include/asm/page_offset_mm.h
deleted file mode 100644
index 1cbdb7f..0000000
--- a/arch/m68k/include/asm/page_offset_mm.h
+++ /dev/null
@@ -1,8 +0,0 @@
-
-/* This handles the memory map.. */
-#ifndef CONFIG_SUN3
-#define PAGE_OFFSET_RAW		0x00000000
-#else
-#define PAGE_OFFSET_RAW		0x0E000000
-#endif
-
diff --git a/arch/m68k/include/asm/page_offset_no.h b/arch/m68k/include/asm/page_offset_no.h
deleted file mode 100644
index d4e73e0..0000000
--- a/arch/m68k/include/asm/page_offset_no.h
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-/* This handles the memory map.. */
-#define	PAGE_OFFSET_RAW		CONFIG_RAMBASE
-
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index dbea953..4ad0aea 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -1,5 +1,12 @@
-#ifdef __uClinux__
-#include "pci_no.h"
-#else
-#include "pci_mm.h"
-#endif
+#ifndef _ASM_M68K_PCI_H
+#define _ASM_M68K_PCI_H
+
+#include <asm-generic/pci-dma-compat.h>
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(1)
+
+#endif /* _ASM_M68K_PCI_H */
diff --git a/arch/m68k/include/asm/pci_mm.h b/arch/m68k/include/asm/pci_mm.h
deleted file mode 100644
index 4ad0aea..0000000
--- a/arch/m68k/include/asm/pci_mm.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_M68K_PCI_H
-#define _ASM_M68K_PCI_H
-
-#include <asm-generic/pci-dma-compat.h>
-
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
-#endif /* _ASM_M68K_PCI_H */
diff --git a/arch/m68k/include/asm/pci_no.h b/arch/m68k/include/asm/pci_no.h
deleted file mode 100644
index 9abbc03..0000000
--- a/arch/m68k/include/asm/pci_no.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef M68KNOMMU_PCI_H
-#define	M68KNOMMU_PCI_H
-
-#include <asm/pci_mm.h>
-
-#ifdef CONFIG_COMEMPCI
-/*
- *	These are pretty much arbitary with the CoMEM implementation.
- *	We have the whole address space to ourselves.
- */
-#define PCIBIOS_MIN_IO		0x100
-#define PCIBIOS_MIN_MEM		0x00010000
-
-#define pcibios_scan_all_fns(a, b)	0
-
-/*
- * Return whether the given PCI device DMA address mask can
- * be supported properly.  For example, if your device can
- * only drive the low 24-bits during PCI bus mastering, then
- * you would pass 0x00ffffff as the mask to this function.
- */
-static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
-{
-	return 1;
-}
-
-#endif /* CONFIG_COMEMPCI */
-
-#endif /* M68KNOMMU_PCI_H */
diff --git a/arch/m68k/include/asm/pgalloc.h b/arch/m68k/include/asm/pgalloc.h
index 059cb73..c294aad 100644
--- a/arch/m68k/include/asm/pgalloc.h
+++ b/arch/m68k/include/asm/pgalloc.h
@@ -1,5 +1,19 @@
-#ifdef __uClinux__
-#include "pgalloc_no.h"
+#ifndef M68K_PGALLOC_H
+#define M68K_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_MMU
+#include <asm/virtconvert.h>
+#ifdef CONFIG_SUN3
+#include <asm/sun3_pgalloc.h>
 #else
-#include "pgalloc_mm.h"
+#include <asm/motorola_pgalloc.h>
 #endif
+
+extern void m68k_setup_node(int node);
+#endif
+
+#endif /* M68K_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgalloc_mm.h b/arch/m68k/include/asm/pgalloc_mm.h
deleted file mode 100644
index 4cb1a57..0000000
--- a/arch/m68k/include/asm/pgalloc_mm.h
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#ifndef M68K_PGALLOC_H
-#define M68K_PGALLOC_H
-
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <asm/setup.h>
-#include <asm/virtconvert.h>
-
-
-#ifdef CONFIG_SUN3
-#include <asm/sun3_pgalloc.h>
-#else
-#include <asm/motorola_pgalloc.h>
-#endif
-
-extern void m68k_setup_node(int node);
-
-#endif /* M68K_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgalloc_no.h b/arch/m68k/include/asm/pgalloc_no.h
deleted file mode 100644
index d6352f6..0000000
--- a/arch/m68k/include/asm/pgalloc_no.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _M68KNOMMU_PGALLOC_H
-#define _M68KNOMMU_PGALLOC_H
-
-#include <asm/setup.h>
-
-#define check_pgt_cache()	do { } while (0)
-
-#endif /* _M68KNOMMU_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 4625101..bf86b29 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -67,4 +67,6 @@ extern unsigned int kobjsize(const void *objp);
 
 #include <asm-generic/pgtable.h>
 
+#define check_pgt_cache()	do { } while (0)
+
 #endif /* _M68KNOMMU_PGTABLE_H */
diff --git a/arch/m68k/include/asm/rtc.h b/arch/m68k/include/asm/rtc.h
index 5d3e038..a4d08ea 100644
--- a/arch/m68k/include/asm/rtc.h
+++ b/arch/m68k/include/asm/rtc.h
@@ -36,13 +36,16 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 	 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
 	 * by the RTC when initially set to a non-zero value.
 	 */
-	mach_hwclk(0, time);
+	if (mach_hwclk)
+		mach_hwclk(0, time);
 	return RTC_24H;
 }
 
 static inline int set_rtc_time(struct rtc_time *time)
 {
-	return mach_hwclk(1, time);
+	if (mach_hwclk)
+		return mach_hwclk(1, time);
+	return -EINVAL;
 }
 
 static inline unsigned int get_rtc_ss(void)
diff --git a/arch/m68k/include/asm/scatterlist.h b/arch/m68k/include/asm/scatterlist.h
index b7e5286..e27ad90 100644
--- a/arch/m68k/include/asm/scatterlist.h
+++ b/arch/m68k/include/asm/scatterlist.h
@@ -1,5 +1,23 @@
-#ifdef __uClinux__
-#include "scatterlist_no.h"
-#else
-#include "scatterlist_mm.h"
+#ifndef _M68K_SCATTERLIST_H
+#define _M68K_SCATTERLIST_H
+
+#include <linux/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long sg_magic;
 #endif
+	unsigned long page_link;
+	unsigned int offset;
+	unsigned int length;
+
+	dma_addr_t dma_address;	/* A place to hang host-specific addresses at. */
+};
+
+/* This is bogus and should go away. */
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+#endif /* !(_M68K_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/scatterlist_mm.h b/arch/m68k/include/asm/scatterlist_mm.h
deleted file mode 100644
index d3a7a0e..0000000
--- a/arch/m68k/include/asm/scatterlist_mm.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _M68K_SCATTERLIST_H
-#define _M68K_SCATTERLIST_H
-
-#include <linux/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long sg_magic;
-#endif
-	unsigned long page_link;
-	unsigned int offset;
-	unsigned int length;
-
-	__u32 dma_address;	/* A place to hang host-specific addresses at. */
-};
-
-/* This is bogus and should go away. */
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#define sg_dma_address(sg)	((sg)->dma_address)
-#define sg_dma_len(sg)		((sg)->length)
-
-#endif /* !(_M68K_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/scatterlist_no.h b/arch/m68k/include/asm/scatterlist_no.h
deleted file mode 100644
index afc4788..0000000
--- a/arch/m68k/include/asm/scatterlist_no.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _M68KNOMMU_SCATTERLIST_H
-#define _M68KNOMMU_SCATTERLIST_H
-
-#include <linux/mm.h>
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
-#endif
-	unsigned long	page_link;
-	unsigned int	offset;
-	dma_addr_t	dma_address;
-	unsigned int	length;
-};
-
-#define sg_dma_address(sg)      ((sg)->dma_address)
-#define sg_dma_len(sg)          ((sg)->length)
-
-#define ISA_DMA_THRESHOLD	(0xffffffff)
-
-#endif /* !(_M68KNOMMU_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
index 82583bc..ee95921 100644
--- a/arch/m68k/include/asm/segment.h
+++ b/arch/m68k/include/asm/segment.h
@@ -1,5 +1,63 @@
-#ifdef __uClinux__
-#include "segment_no.h"
+#ifndef _M68K_SEGMENT_H
+#define _M68K_SEGMENT_H
+
+/* define constants */
+/* Address spaces (FC0-FC2) */
+#define USER_DATA     (1)
+#ifndef __USER_DS
+#define __USER_DS     (USER_DATA)
+#endif
+#define USER_PROGRAM  (2)
+#define SUPER_DATA    (5)
+#ifndef __KERNEL_DS
+#define __KERNEL_DS   (SUPER_DATA)
+#endif
+#define SUPER_PROGRAM (6)
+#define CPU_SPACE     (7)
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+#define USER_DS		MAKE_MM_SEG(__USER_DS)
+#define KERNEL_DS	MAKE_MM_SEG(__KERNEL_DS)
+
+/*
+ * Get/set the SFC/DFC registers for MOVES instructions
+ */
+
+static inline mm_segment_t get_fs(void)
+{
+#ifdef CONFIG_MMU
+	mm_segment_t _v;
+	__asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
+
+	return _v;
 #else
-#include "segment_mm.h"
+	return USER_DS;
+#endif
+}
+
+static inline mm_segment_t get_ds(void)
+{
+    /* return the supervisor data space code */
+    return KERNEL_DS;
+}
+
+static inline void set_fs(mm_segment_t val)
+{
+#ifdef CONFIG_MMU
+	__asm__ __volatile__ ("movec %0,%/sfc\n\t"
+			      "movec %0,%/dfc\n\t"
+			      : /* no outputs */ : "r" (val.seg) : "memory");
 #endif
+}
+
+#define segment_eq(a,b)	((a).seg == (b).seg)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/segment_mm.h b/arch/m68k/include/asm/segment_mm.h
deleted file mode 100644
index 7b0b2d3..0000000
--- a/arch/m68k/include/asm/segment_mm.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef _M68K_SEGMENT_H
-#define _M68K_SEGMENT_H
-
-/* define constants */
-/* Address spaces (FC0-FC2) */
-#define USER_DATA     (1)
-#ifndef __USER_DS
-#define __USER_DS     (USER_DATA)
-#endif
-#define USER_PROGRAM  (2)
-#define SUPER_DATA    (5)
-#ifndef __KERNEL_DS
-#define __KERNEL_DS   (SUPER_DATA)
-#endif
-#define SUPER_PROGRAM (6)
-#define CPU_SPACE     (7)
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-#define USER_DS		MAKE_MM_SEG(__USER_DS)
-#define KERNEL_DS	MAKE_MM_SEG(__KERNEL_DS)
-
-/*
- * Get/set the SFC/DFC registers for MOVES instructions
- */
-
-static inline mm_segment_t get_fs(void)
-{
-	mm_segment_t _v;
-	__asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
-
-	return _v;
-}
-
-static inline mm_segment_t get_ds(void)
-{
-    /* return the supervisor data space code */
-    return KERNEL_DS;
-}
-
-static inline void set_fs(mm_segment_t val)
-{
-	__asm__ __volatile__ ("movec %0,%/sfc\n\t"
-			      "movec %0,%/dfc\n\t"
-			      : /* no outputs */ : "r" (val.seg) : "memory");
-}
-
-#define segment_eq(a,b)	((a).seg == (b).seg)
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/segment_no.h b/arch/m68k/include/asm/segment_no.h
deleted file mode 100644
index 42318eb..0000000
--- a/arch/m68k/include/asm/segment_no.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef _M68K_SEGMENT_H
-#define _M68K_SEGMENT_H
-
-/* define constants */
-/* Address spaces (FC0-FC2) */
-#define USER_DATA     (1)
-#ifndef __USER_DS
-#define __USER_DS     (USER_DATA)
-#endif
-#define USER_PROGRAM  (2)
-#define SUPER_DATA    (5)
-#ifndef __KERNEL_DS
-#define __KERNEL_DS   (SUPER_DATA)
-#endif
-#define SUPER_PROGRAM (6)
-#define CPU_SPACE     (7)
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-#define USER_DS		MAKE_MM_SEG(__USER_DS)
-#define KERNEL_DS	MAKE_MM_SEG(__KERNEL_DS)
-
-/*
- * Get/set the SFC/DFC registers for MOVES instructions
- */
-
-static inline mm_segment_t get_fs(void)
-{
-    return USER_DS;
-}
-
-static inline mm_segment_t get_ds(void)
-{
-    /* return the supervisor data space code */
-    return KERNEL_DS;
-}
-
-static inline void set_fs(mm_segment_t val)
-{
-}
-
-#define segment_eq(a,b)	((a).seg == (b).seg)
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h
index 7197629..b87f2f2 100644
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -1,5 +1,18 @@
-#ifdef __uClinux__
-#include "timex_no.h"
-#else
-#include "timex_mm.h"
+/*
+ * linux/include/asm-m68k/timex.h
+ *
+ * m68k architecture timex specifications
+ */
+#ifndef _ASMm68k_TIMEX_H
+#define _ASMm68k_TIMEX_H
+
+#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+	return 0;
+}
+
 #endif
diff --git a/arch/m68k/include/asm/timex_mm.h b/arch/m68k/include/asm/timex_mm.h
deleted file mode 100644
index b87f2f2..0000000
--- a/arch/m68k/include/asm/timex_mm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * linux/include/asm-m68k/timex.h
- *
- * m68k architecture timex specifications
- */
-#ifndef _ASMm68k_TIMEX_H
-#define _ASMm68k_TIMEX_H
-
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
-
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles(void)
-{
-	return 0;
-}
-
-#endif
diff --git a/arch/m68k/include/asm/timex_no.h b/arch/m68k/include/asm/timex_no.h
deleted file mode 100644
index 109050f..0000000
--- a/arch/m68k/include/asm/timex_no.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * linux/include/asm-m68knommu/timex.h
- *
- * m68knommu architecture timex specifications
- */
-#ifndef _ASM_M68KNOMMU_TIMEX_H
-#define _ASM_M68KNOMMU_TIMEX_H
-
-#ifdef CONFIG_COLDFIRE
-#include <asm/coldfire.h>
-#define CLOCK_TICK_RATE	MCF_CLK
-#else
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
-#endif
-
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles(void)
-{
-	return 0;
-}
-
-#endif
diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h
index b6f93b3..a6b4ed4 100644
--- a/arch/m68k/include/asm/tlbflush.h
+++ b/arch/m68k/include/asm/tlbflush.h
@@ -1,5 +1,267 @@
-#ifdef __uClinux__
-#include "tlbflush_no.h"
+#ifndef _M68K_TLBFLUSH_H
+#define _M68K_TLBFLUSH_H
+
+#ifdef CONFIG_MMU
+#ifndef CONFIG_SUN3
+
+#include <asm/current.h>
+
+static inline void flush_tlb_kernel_page(void *addr)
+{
+	if (CPU_IS_040_OR_060) {
+		mm_segment_t old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		__asm__ __volatile__(".chip 68040\n\t"
+				     "pflush (%0)\n\t"
+				     ".chip 68k"
+				     : : "a" (addr));
+		set_fs(old_fs);
+	} else if (CPU_IS_020_OR_030)
+		__asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
+}
+
+/*
+ * flush all user-space atc entries.
+ */
+static inline void __flush_tlb(void)
+{
+	if (CPU_IS_040_OR_060)
+		__asm__ __volatile__(".chip 68040\n\t"
+				     "pflushan\n\t"
+				     ".chip 68k");
+	else if (CPU_IS_020_OR_030)
+		__asm__ __volatile__("pflush #0,#4");
+}
+
+static inline void __flush_tlb040_one(unsigned long addr)
+{
+	__asm__ __volatile__(".chip 68040\n\t"
+			     "pflush (%0)\n\t"
+			     ".chip 68k"
+			     : : "a" (addr));
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	if (CPU_IS_040_OR_060)
+		__flush_tlb040_one(addr);
+	else if (CPU_IS_020_OR_030)
+		__asm__ __volatile__("pflush #0,#4,(%0)" : : "a" (addr));
+}
+
+#define flush_tlb() __flush_tlb()
+
+/*
+ * flush all atc entries (both kernel and user-space entries).
+ */
+static inline void flush_tlb_all(void)
+{
+	if (CPU_IS_040_OR_060)
+		__asm__ __volatile__(".chip 68040\n\t"
+				     "pflusha\n\t"
+				     ".chip 68k");
+	else if (CPU_IS_020_OR_030)
+		__asm__ __volatile__("pflusha");
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (vma->vm_mm == current->active_mm) {
+		mm_segment_t old_fs = get_fs();
+		set_fs(USER_DS);
+		__flush_tlb_one(addr);
+		set_fs(old_fs);
+	}
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_tlb_all();
+}
+
 #else
-#include "tlbflush_mm.h"
+
+
+/* Reserved PMEGs. */
+extern char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
+extern unsigned long pmeg_vaddr[SUN3_PMEGS_NUM];
+extern unsigned char pmeg_alloc[SUN3_PMEGS_NUM];
+extern unsigned char pmeg_ctx[SUN3_PMEGS_NUM];
+
+/* Flush all userspace mappings one by one...  (why no flush command,
+   sun?) */
+static inline void flush_tlb_all(void)
+{
+       unsigned long addr;
+       unsigned char ctx, oldctx;
+
+       oldctx = sun3_get_context();
+       for(addr = 0x00000000; addr < TASK_SIZE; addr += SUN3_PMEG_SIZE) {
+	       for(ctx = 0; ctx < 8; ctx++) {
+		       sun3_put_context(ctx);
+		       sun3_put_segmap(addr, SUN3_INVALID_PMEG);
+	       }
+       }
+
+       sun3_put_context(oldctx);
+       /* erase all of the userspace pmeg maps, we've clobbered them
+	  all anyway */
+       for(addr = 0; addr < SUN3_INVALID_PMEG; addr++) {
+	       if(pmeg_alloc[addr] == 1) {
+		       pmeg_alloc[addr] = 0;
+		       pmeg_ctx[addr] = 0;
+		       pmeg_vaddr[addr] = 0;
+	       }
+       }
+
+}
+
+/* Clear user TLB entries within the context named in mm */
+static inline void flush_tlb_mm (struct mm_struct *mm)
+{
+     unsigned char oldctx;
+     unsigned char seg;
+     unsigned long i;
+
+     oldctx = sun3_get_context();
+     sun3_put_context(mm->context);
+
+     for(i = 0; i < TASK_SIZE; i += SUN3_PMEG_SIZE) {
+	     seg = sun3_get_segmap(i);
+	     if(seg == SUN3_INVALID_PMEG)
+		     continue;
+
+	     sun3_put_segmap(i, SUN3_INVALID_PMEG);
+	     pmeg_alloc[seg] = 0;
+	     pmeg_ctx[seg] = 0;
+	     pmeg_vaddr[seg] = 0;
+     }
+
+     sun3_put_context(oldctx);
+
+}
+
+/* Flush a single TLB page. In this case, we're limited to flushing a
+   single PMEG */
+static inline void flush_tlb_page (struct vm_area_struct *vma,
+				   unsigned long addr)
+{
+	unsigned char oldctx;
+	unsigned char i;
+
+	oldctx = sun3_get_context();
+	sun3_put_context(vma->vm_mm->context);
+	addr &= ~SUN3_PMEG_MASK;
+	if((i = sun3_get_segmap(addr)) != SUN3_INVALID_PMEG)
+	{
+		pmeg_alloc[i] = 0;
+		pmeg_ctx[i] = 0;
+		pmeg_vaddr[i] = 0;
+		sun3_put_segmap (addr,  SUN3_INVALID_PMEG);
+	}
+	sun3_put_context(oldctx);
+
+}
+/* Flush a range of pages from TLB. */
+
+static inline void flush_tlb_range (struct vm_area_struct *vma,
+		      unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned char seg, oldctx;
+
+	start &= ~SUN3_PMEG_MASK;
+
+	oldctx = sun3_get_context();
+	sun3_put_context(mm->context);
+
+	while(start < end)
+	{
+		if((seg = sun3_get_segmap(start)) == SUN3_INVALID_PMEG)
+		     goto next;
+		if(pmeg_ctx[seg] == mm->context) {
+			pmeg_alloc[seg] = 0;
+			pmeg_ctx[seg] = 0;
+			pmeg_vaddr[seg] = 0;
+		}
+		sun3_put_segmap(start, SUN3_INVALID_PMEG);
+	next:
+		start += SUN3_PMEG_SIZE;
+	}
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_tlb_all();
+}
+
+/* Flush kernel page from TLB. */
+static inline void flush_tlb_kernel_page (unsigned long addr)
+{
+	sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
+}
+
 #endif
+
+#else /* !CONFIG_MMU */
+
+/*
+ * flush all user-space atc entries.
+ */
+static inline void __flush_tlb(void)
+{
+	BUG();
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	BUG();
+}
+
+#define flush_tlb() __flush_tlb()
+
+/*
+ * flush all atc entries (both kernel and user-space entries).
+ */
+static inline void flush_tlb_all(void)
+{
+	BUG();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	BUG();
+}
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+				   unsigned long start, unsigned long end)
+{
+	BUG();
+}
+
+static inline void flush_tlb_kernel_page(unsigned long addr)
+{
+	BUG();
+}
+
+#endif /* CONFIG_MMU */
+
+#endif /* _M68K_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/tlbflush_mm.h b/arch/m68k/include/asm/tlbflush_mm.h
deleted file mode 100644
index acb6bf2..0000000
--- a/arch/m68k/include/asm/tlbflush_mm.h
+++ /dev/null
@@ -1,219 +0,0 @@
-#ifndef _M68K_TLBFLUSH_H
-#define _M68K_TLBFLUSH_H
-
-
-#ifndef CONFIG_SUN3
-
-#include <asm/current.h>
-
-static inline void flush_tlb_kernel_page(void *addr)
-{
-	if (CPU_IS_040_OR_060) {
-		mm_segment_t old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		__asm__ __volatile__(".chip 68040\n\t"
-				     "pflush (%0)\n\t"
-				     ".chip 68k"
-				     : : "a" (addr));
-		set_fs(old_fs);
-	} else if (CPU_IS_020_OR_030)
-		__asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
-}
-
-/*
- * flush all user-space atc entries.
- */
-static inline void __flush_tlb(void)
-{
-	if (CPU_IS_040_OR_060)
-		__asm__ __volatile__(".chip 68040\n\t"
-				     "pflushan\n\t"
-				     ".chip 68k");
-	else if (CPU_IS_020_OR_030)
-		__asm__ __volatile__("pflush #0,#4");
-}
-
-static inline void __flush_tlb040_one(unsigned long addr)
-{
-	__asm__ __volatile__(".chip 68040\n\t"
-			     "pflush (%0)\n\t"
-			     ".chip 68k"
-			     : : "a" (addr));
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	if (CPU_IS_040_OR_060)
-		__flush_tlb040_one(addr);
-	else if (CPU_IS_020_OR_030)
-		__asm__ __volatile__("pflush #0,#4,(%0)" : : "a" (addr));
-}
-
-#define flush_tlb() __flush_tlb()
-
-/*
- * flush all atc entries (both kernel and user-space entries).
- */
-static inline void flush_tlb_all(void)
-{
-	if (CPU_IS_040_OR_060)
-		__asm__ __volatile__(".chip 68040\n\t"
-				     "pflusha\n\t"
-				     ".chip 68k");
-	else if (CPU_IS_020_OR_030)
-		__asm__ __volatile__("pflusha");
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm) {
-		mm_segment_t old_fs = get_fs();
-		set_fs(USER_DS);
-		__flush_tlb_one(addr);
-		set_fs(old_fs);
-	}
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#else
-
-
-/* Reserved PMEGs. */
-extern char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
-extern unsigned long pmeg_vaddr[SUN3_PMEGS_NUM];
-extern unsigned char pmeg_alloc[SUN3_PMEGS_NUM];
-extern unsigned char pmeg_ctx[SUN3_PMEGS_NUM];
-
-/* Flush all userspace mappings one by one...  (why no flush command,
-   sun?) */
-static inline void flush_tlb_all(void)
-{
-       unsigned long addr;
-       unsigned char ctx, oldctx;
-
-       oldctx = sun3_get_context();
-       for(addr = 0x00000000; addr < TASK_SIZE; addr += SUN3_PMEG_SIZE) {
-	       for(ctx = 0; ctx < 8; ctx++) {
-		       sun3_put_context(ctx);
-		       sun3_put_segmap(addr, SUN3_INVALID_PMEG);
-	       }
-       }
-
-       sun3_put_context(oldctx);
-       /* erase all of the userspace pmeg maps, we've clobbered them
-	  all anyway */
-       for(addr = 0; addr < SUN3_INVALID_PMEG; addr++) {
-	       if(pmeg_alloc[addr] == 1) {
-		       pmeg_alloc[addr] = 0;
-		       pmeg_ctx[addr] = 0;
-		       pmeg_vaddr[addr] = 0;
-	       }
-       }
-
-}
-
-/* Clear user TLB entries within the context named in mm */
-static inline void flush_tlb_mm (struct mm_struct *mm)
-{
-     unsigned char oldctx;
-     unsigned char seg;
-     unsigned long i;
-
-     oldctx = sun3_get_context();
-     sun3_put_context(mm->context);
-
-     for(i = 0; i < TASK_SIZE; i += SUN3_PMEG_SIZE) {
-	     seg = sun3_get_segmap(i);
-	     if(seg == SUN3_INVALID_PMEG)
-		     continue;
-
-	     sun3_put_segmap(i, SUN3_INVALID_PMEG);
-	     pmeg_alloc[seg] = 0;
-	     pmeg_ctx[seg] = 0;
-	     pmeg_vaddr[seg] = 0;
-     }
-
-     sun3_put_context(oldctx);
-
-}
-
-/* Flush a single TLB page. In this case, we're limited to flushing a
-   single PMEG */
-static inline void flush_tlb_page (struct vm_area_struct *vma,
-				   unsigned long addr)
-{
-	unsigned char oldctx;
-	unsigned char i;
-
-	oldctx = sun3_get_context();
-	sun3_put_context(vma->vm_mm->context);
-	addr &= ~SUN3_PMEG_MASK;
-	if((i = sun3_get_segmap(addr)) != SUN3_INVALID_PMEG)
-	{
-		pmeg_alloc[i] = 0;
-		pmeg_ctx[i] = 0;
-		pmeg_vaddr[i] = 0;
-		sun3_put_segmap (addr,  SUN3_INVALID_PMEG);
-	}
-	sun3_put_context(oldctx);
-
-}
-/* Flush a range of pages from TLB. */
-
-static inline void flush_tlb_range (struct vm_area_struct *vma,
-		      unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	unsigned char seg, oldctx;
-
-	start &= ~SUN3_PMEG_MASK;
-
-	oldctx = sun3_get_context();
-	sun3_put_context(mm->context);
-
-	while(start < end)
-	{
-		if((seg = sun3_get_segmap(start)) == SUN3_INVALID_PMEG)
-		     goto next;
-		if(pmeg_ctx[seg] == mm->context) {
-			pmeg_alloc[seg] = 0;
-			pmeg_ctx[seg] = 0;
-			pmeg_vaddr[seg] = 0;
-		}
-		sun3_put_segmap(start, SUN3_INVALID_PMEG);
-	next:
-		start += SUN3_PMEG_SIZE;
-	}
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	flush_tlb_all();
-}
-
-/* Flush kernel page from TLB. */
-static inline void flush_tlb_kernel_page (unsigned long addr)
-{
-	sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
-}
-
-#endif
-
-#endif /* _M68K_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/tlbflush_no.h b/arch/m68k/include/asm/tlbflush_no.h
deleted file mode 100644
index a470cfb..0000000
--- a/arch/m68k/include/asm/tlbflush_no.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _M68KNOMMU_TLBFLUSH_H
-#define _M68KNOMMU_TLBFLUSH_H
-
-/*
- * Copyright (C) 2000 Lineo, David McCullough <davidm@uclinux.org>
- * Copyright (C) 2000-2002, Greg Ungerer <gerg@snapgear.com>
- */
-
-#include <asm/setup.h>
-
-/*
- * flush all user-space atc entries.
- */
-static inline void __flush_tlb(void)
-{
-	BUG();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	BUG();
-}
-
-#define flush_tlb() __flush_tlb()
-
-/*
- * flush all atc entries (both kernel and user-space entries).
- */
-static inline void flush_tlb_all(void)
-{
-	BUG();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	BUG();
-}
-
-static inline void flush_tlb_range(struct mm_struct *mm,
-				   unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
-static inline void flush_tlb_kernel_page(unsigned long addr)
-{
-	BUG();
-}
-
-#endif /* _M68KNOMMU_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/ucontext.h b/arch/m68k/include/asm/ucontext.h
index b53cd16..e4e2266 100644
--- a/arch/m68k/include/asm/ucontext.h
+++ b/arch/m68k/include/asm/ucontext.h
@@ -1,5 +1,30 @@
-#ifdef __uClinux__
-#include "ucontext_no.h"
-#else
-#include "ucontext_mm.h"
+#ifndef _M68K_UCONTEXT_H
+#define _M68K_UCONTEXT_H
+
+typedef int greg_t;
+#define NGREG 18
+typedef greg_t gregset_t[NGREG];
+
+typedef struct fpregset {
+	int f_fpcntl[3];
+	int f_fpregs[8*3];
+} fpregset_t;
+
+struct mcontext {
+	int version;
+	gregset_t gregs;
+	fpregset_t fpregs;
+};
+
+#define MCONTEXT_VERSION 2
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct mcontext	  uc_mcontext;
+	unsigned long	  uc_filler[80];
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
 #endif
diff --git a/arch/m68k/include/asm/ucontext_mm.h b/arch/m68k/include/asm/ucontext_mm.h
deleted file mode 100644
index e4e2266..0000000
--- a/arch/m68k/include/asm/ucontext_mm.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _M68K_UCONTEXT_H
-#define _M68K_UCONTEXT_H
-
-typedef int greg_t;
-#define NGREG 18
-typedef greg_t gregset_t[NGREG];
-
-typedef struct fpregset {
-	int f_fpcntl[3];
-	int f_fpregs[8*3];
-} fpregset_t;
-
-struct mcontext {
-	int version;
-	gregset_t gregs;
-	fpregset_t fpregs;
-};
-
-#define MCONTEXT_VERSION 2
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct mcontext	  uc_mcontext;
-	unsigned long	  uc_filler[80];
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif
diff --git a/arch/m68k/include/asm/ucontext_no.h b/arch/m68k/include/asm/ucontext_no.h
deleted file mode 100644
index 713a27f..0000000
--- a/arch/m68k/include/asm/ucontext_no.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _M68KNOMMU_UCONTEXT_H
-#define _M68KNOMMU_UCONTEXT_H
-
-typedef int greg_t;
-#define NGREG 18
-typedef greg_t gregset_t[NGREG];
-
-typedef struct fpregset {
-	int f_pcr;
-	int f_psr;
-	int f_fpiaddr;
-	int f_fpregs[8][3];
-} fpregset_t;
-
-struct mcontext {
-	int version;
-	gregset_t gregs;
-	fpregset_t fpregs;
-};
-
-#define MCONTEXT_VERSION 2
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct mcontext	  uc_mcontext;
-	unsigned long	  uc_filler[80];
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif
diff --git a/arch/m68k/include/asm/unaligned.h b/arch/m68k/include/asm/unaligned.h
index c640bba..019caa7 100644
--- a/arch/m68k/include/asm/unaligned.h
+++ b/arch/m68k/include/asm/unaligned.h
@@ -1,5 +1,25 @@
-#ifdef __uClinux__
-#include "unaligned_no.h"
+#ifndef _ASM_M68K_UNALIGNED_H
+#define _ASM_M68K_UNALIGNED_H
+
+
+#ifdef CONFIG_COLDFIRE
+#include <linux/unaligned/be_struct.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
 #else
-#include "unaligned_mm.h"
+/*
+ * The m68k can do unaligned accesses itself. 
+ */
+#include <linux/unaligned/access_ok.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
 #endif
+
+#endif /* _ASM_M68K_UNALIGNED_H */
diff --git a/arch/m68k/include/asm/unaligned_mm.h b/arch/m68k/include/asm/unaligned_mm.h
deleted file mode 100644
index 77698f2..0000000
--- a/arch/m68k/include/asm/unaligned_mm.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_M68K_UNALIGNED_H
-#define _ASM_M68K_UNALIGNED_H
-
-/*
- * The m68k can do unaligned accesses itself.
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#endif /* _ASM_M68K_UNALIGNED_H */
diff --git a/arch/m68k/include/asm/unaligned_no.h b/arch/m68k/include/asm/unaligned_no.h
deleted file mode 100644
index eb1ea4c..0000000
--- a/arch/m68k/include/asm/unaligned_no.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _ASM_M68KNOMMU_UNALIGNED_H
-#define _ASM_M68KNOMMU_UNALIGNED_H
-
-
-#ifdef CONFIG_COLDFIRE
-#include <linux/unaligned/be_struct.h>
-#include <linux/unaligned/le_byteshift.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#else
-/*
- * The m68k can do unaligned accesses itself. 
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#endif
-
-#endif /* _ASM_M68KNOMMU_UNALIGNED_H */
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 7db4159..54d9807 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/rtc.h>
+#include <linux/platform_device.h>
 
 #include <asm/machdep.h>
 #include <asm/io.h>
@@ -159,3 +160,20 @@ int do_settimeofday(struct timespec *tv)
 }
 
 EXPORT_SYMBOL(do_settimeofday);
+
+
+static int __init rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!mach_hwclk)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+
+module_init(rtc_init);
diff --git a/arch/m68knommu/Makefile b/arch/m68knommu/Makefile
index fd0fb30..ce404bc 100644
--- a/arch/m68knommu/Makefile
+++ b/arch/m68knommu/Makefile
@@ -88,18 +88,18 @@ export PLATFORM BOARD MODEL CPUCLASS
 #
 # Some CFLAG additions based on specific CPU type.
 #
-cflags-$(CONFIG_M5206)		:= -m5200
-cflags-$(CONFIG_M5206e)		:= -m5200
-cflags-$(CONFIG_M520x)		:= -m5307
+cflags-$(CONFIG_M5206)		:= $(call cc-option,-mcpu=5206,-m5200)
+cflags-$(CONFIG_M5206e)		:= $(call cc-option,-m5206e,-m5200)
+cflags-$(CONFIG_M520x)		:= $(call cc-option,-mcpu=5208,-m5200)
 cflags-$(CONFIG_M523x)		:= $(call cc-option,-mcpu=523x,-m5307)
-cflags-$(CONFIG_M5249)		:= -m5200
+cflags-$(CONFIG_M5249)		:= $(call cc-option,-mcpu=5249,-m5200)
 cflags-$(CONFIG_M5271)		:= $(call cc-option,-mcpu=5271,-m5307)
-cflags-$(CONFIG_M5272)		:= -m5307
+cflags-$(CONFIG_M5272)		:= $(call cc-option,-mcpu=5271,-m5200)
 cflags-$(CONFIG_M5275)		:= $(call cc-option,-mcpu=5275,-m5307)
 cflags-$(CONFIG_M528x)		:= $(call cc-option,-m528x,-m5307)
-cflags-$(CONFIG_M5307)		:= -m5307
+cflags-$(CONFIG_M5307)		:= $(call cc-option,-m5307,-m5200)
 cflags-$(CONFIG_M532x)		:= $(call cc-option,-mcpu=532x,-m5307)
-cflags-$(CONFIG_M5407)		:= -m5200
+cflags-$(CONFIG_M5407)		:= $(call cc-option,-m5407,-m5200)
 cflags-$(CONFIG_M68328)		:= -m68000
 cflags-$(CONFIG_M68EZ328)	:= -m68000
 cflags-$(CONFIG_M68VZ328)	:= -m68000
diff --git a/arch/m68knommu/kernel/dma.c b/arch/m68knommu/kernel/dma.c
index e10eafc..9361258 100644
--- a/arch/m68knommu/kernel/dma.c
+++ b/arch/m68knommu/kernel/dma.c
@@ -9,10 +9,11 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, int gfp)
+			   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -34,3 +35,8 @@ void dma_free_coherent(struct device *dev, size_t size,
 {
 	free_pages((unsigned long)vaddr, get_order(size));
 }
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+}
+
diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c
index bba1bb4..56e0f4c 100644
--- a/arch/m68knommu/kernel/irq.c
+++ b/arch/m68knommu/kernel/irq.c
@@ -23,7 +23,7 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 	struct pt_regs *oldregs = set_irq_regs(regs);
 
 	irq_enter();
-	__do_IRQ(irq);
+	generic_handle_irq(irq);
 	irq_exit();
 
 	set_irq_regs(oldregs);
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 3bf249c..7befc0c 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -111,11 +111,7 @@ void __init paging_init(void)
 	{
 		unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
-		zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
-		zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
-#ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = 0;
-#endif
+		zones_size[ZONE_DMA] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
 		free_area_init(zones_size);
 	}
 }
diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c
index d299f7b..9eab19d 100644
--- a/arch/m68knommu/platform/5249/config.c
+++ b/arch/m68knommu/platform/5249/config.c
@@ -32,7 +32,8 @@ static struct mcf_platform_uart m5249_uart_platform[] = {
 	{
 		.mapbase 	= MCF_MBAR + MCFUART_BASE2,
 		.irq		= 74,
-	}
+	},
+	{ },
 };
 
 static struct platform_device m5249_uart = {
@@ -50,12 +51,12 @@ static struct platform_device *m5249_devices[] __initdata = {
 static void __init m5249_uart_init_line(int line, int irq)
 {
 	if (line == 0) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
-		writeb(irq, MCFUART_BASE1 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
 	} else if (line == 1) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
-		writeb(irq, MCFUART_BASE2 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
 	}
 }
diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c
index 724faf0..44803bf 100644
--- a/arch/m68knommu/platform/5307/config.c
+++ b/arch/m68knommu/platform/5307/config.c
@@ -65,12 +65,12 @@ static struct platform_device *m5307_devices[] __initdata = {
 static void __init m5307_uart_init_line(int line, int irq)
 {
 	if (line == 0) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
-		writeb(irq, MCFUART_BASE1 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
 	} else if (line == 1) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
-		writeb(irq, MCFUART_BASE2 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
 	}
 }
diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c
index 648b8b7..0ee8c1a 100644
--- a/arch/m68knommu/platform/5407/config.c
+++ b/arch/m68knommu/platform/5407/config.c
@@ -56,12 +56,12 @@ static struct platform_device *m5407_devices[] __initdata = {
 static void __init m5407_uart_init_line(int line, int irq)
 {
 	if (line == 0) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
-		writeb(irq, MCFUART_BASE1 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
 	} else if (line == 1) {
-		writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
-		writeb(irq, MCFUART_BASE2 + MCFUART_UIVR);
+		writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
+		writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
 		mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
 	}
 }
diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile
index 4f416a9..1bcb937 100644
--- a/arch/m68knommu/platform/coldfire/Makefile
+++ b/arch/m68knommu/platform/coldfire/Makefile
@@ -14,7 +14,7 @@
 
 asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1
 
-obj-$(CONFIG_COLDFIRE)	+= dma.o entry.o vectors.o
+obj-$(CONFIG_COLDFIRE)	+= clk.o dma.o entry.o vectors.o
 obj-$(CONFIG_M5206)	+= timers.o
 obj-$(CONFIG_M5206e)	+= timers.o
 obj-$(CONFIG_M520x)	+= pit.o
diff --git a/arch/m68knommu/platform/coldfire/clk.c b/arch/m68knommu/platform/coldfire/clk.c
new file mode 100644
index 0000000..7cdbf44
--- /dev/null
+++ b/arch/m68knommu/platform/coldfire/clk.c
@@ -0,0 +1,40 @@
+/***************************************************************************/
+
+/*
+ *	clk.c -- general ColdFire CPU kernel clk handling
+ *
+ *	Copyright (C) 2009, Greg Ungerer (gerg@snapgear.com)
+ */
+
+/***************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <asm/coldfire.h>
+
+/***************************************************************************/
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	return NULL;
+}
+
+int clk_enable(struct clk *clk)
+{
+	return 0;
+}
+
+void clk_disable(struct clk *clk)
+{
+}
+
+void clk_put(struct clk *clk)
+{
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	return MCF_CLK;
+}
+
+/***************************************************************************/
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index aacf11d..9038f39 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,9 +9,13 @@ config PARISC
 	def_bool y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select HAVE_FUNCTION_TRACER if 64BIT
+	select HAVE_FUNCTION_GRAPH_TRACER if 64BIT
+	select HAVE_FUNCTION_TRACE_MCOUNT_TEST if 64BIT
 	select RTC_CLASS
-	select RTC_DRV_PARISC
+	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
+	select BUG
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
@@ -75,6 +79,9 @@ config GENERIC_HARDIRQS
 config GENERIC_IRQ_PROBE
 	def_bool y
 
+config HAVE_LATENCYTOP_SUPPORT
+        def_bool y
+
 config IRQ_PER_CPU
 	bool
 	default y
@@ -83,6 +90,9 @@ config IRQ_PER_CPU
 config PM
 	bool
 
+config STACKTRACE_SUPPORT
+	def_bool y
+
 config ISA_DMA_API
 	bool
 
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 0d42827..da6f669 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -56,7 +56,9 @@ cflags-y	+= -mdisable-fpregs
 
 # Without this, "ld -r" results in .text sections that are too big
 # (> 0x40000) for branches to reach stubs.
-cflags-y	+= -ffunction-sections
+ifndef CONFIG_FUNCTION_TRACER
+  cflags-y	+= -ffunction-sections
+endif
 
 # select which processor to optimise for
 cflags-$(CONFIG_PA7100)		+= -march=1.1 -mschedule=7100
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index edbfe25..ada3e53 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -25,7 +25,7 @@
  * Since "a" is usually an address, use one spinlock per cacheline.
  */
 #  define ATOMIC_HASH_SIZE 4
-#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
+#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) (a))/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
 
 extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 
@@ -222,13 +222,13 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
-#define atomic_add(i,v)	((void)(__atomic_add_return( ((int)i),(v))))
-#define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)i),(v))))
+#define atomic_add(i,v)	((void)(__atomic_add_return( ((int)(i)),(v))))
+#define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)(i)),(v))))
 #define atomic_inc(v)	((void)(__atomic_add_return(   1,(v))))
 #define atomic_dec(v)	((void)(__atomic_add_return(  -1,(v))))
 
-#define atomic_add_return(i,v)	(__atomic_add_return( ((int)i),(v)))
-#define atomic_sub_return(i,v)	(__atomic_add_return(-((int)i),(v)))
+#define atomic_add_return(i,v)	(__atomic_add_return( ((int)(i)),(v)))
+#define atomic_sub_return(i,v)	(__atomic_add_return(-((int)(i)),(v)))
 #define atomic_inc_return(v)	(__atomic_add_return(   1,(v)))
 #define atomic_dec_return(v)	(__atomic_add_return(  -1,(v)))
 
@@ -289,13 +289,13 @@ atomic64_read(const atomic64_t *v)
 	return v->counter;
 }
 
-#define atomic64_add(i,v)	((void)(__atomic64_add_return( ((s64)i),(v))))
-#define atomic64_sub(i,v)	((void)(__atomic64_add_return(-((s64)i),(v))))
+#define atomic64_add(i,v)	((void)(__atomic64_add_return( ((s64)(i)),(v))))
+#define atomic64_sub(i,v)	((void)(__atomic64_add_return(-((s64)(i)),(v))))
 #define atomic64_inc(v)		((void)(__atomic64_add_return(   1,(v))))
 #define atomic64_dec(v)		((void)(__atomic64_add_return(  -1,(v))))
 
-#define atomic64_add_return(i,v)	(__atomic64_add_return( ((s64)i),(v)))
-#define atomic64_sub_return(i,v)	(__atomic64_add_return(-((s64)i),(v)))
+#define atomic64_add_return(i,v)	(__atomic64_add_return( ((s64)(i)),(v)))
+#define atomic64_sub_return(i,v)	(__atomic64_add_return(-((s64)(i)),(v)))
 #define atomic64_inc_return(v)		(__atomic64_add_return(   1,(v)))
 #define atomic64_dec_return(v)		(__atomic64_add_return(  -1,(v)))
 
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index b7ca6dc..7243951 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -97,6 +97,9 @@ void mark_rodata_ro(void);
 
 #ifdef CONFIG_PA8X00
 /* Only pa8800, pa8900 needs this */
+
+#include <asm/kmap_types.h>
+
 #define ARCH_HAS_KMAP
 
 void kunmap_parisc(void *addr);
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 7fa6757..9c802eb 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -168,6 +168,16 @@ typedef struct elf64_fdesc {
 	__u64	gp;
 } Elf64_Fdesc;
 
+#ifdef __KERNEL__
+
+#ifdef CONFIG_64BIT
+#define Elf_Fdesc	Elf64_Fdesc
+#else
+#define Elf_Fdesc	Elf32_Fdesc
+#endif /*CONFIG_64BIT*/
+
+#endif /*__KERNEL__*/
+
 /* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
 
 #define PT_HP_TLS		(PT_LOOS + 0x0)
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
new file mode 100644
index 0000000..2fa05dd
--- /dev/null
+++ b/arch/parisc/include/asm/ftrace.h
@@ -0,0 +1,25 @@
+#ifndef _ASM_PARISC_FTRACE_H
+#define _ASM_PARISC_FTRACE_H
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+
+/*
+ * Stack of return addresses for functions of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+	unsigned long ret;
+	unsigned long func;
+	unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry.S
+ */
+extern void return_to_handler(void);
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_PARISC_FTRACE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index c3941f0..7bc5125 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -36,16 +36,7 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
  */
 #define STRICT_MM_TYPECHECKS
 #ifdef STRICT_MM_TYPECHECKS
-typedef struct { unsigned long pte;
-#if !defined(CONFIG_64BIT)
-                 unsigned long future_flags;
- /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY
-	 to 2, but then strangely the identical 32bit kernel boots on a
-	 c3000(pa20), but not any longer on a 715(pa11).
-	 Still investigating... HelgeD.
-  */
-#endif
-} pte_t; /* either 32 or 64bit */
+typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
 
 /* NOTE: even on 64 bits, these entries are __u32 because we allocate
  * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
@@ -111,7 +102,7 @@ extern int npmem_ranges;
 #define BITS_PER_PMD_ENTRY	2
 #define BITS_PER_PGD_ENTRY	2
 #else
-#define BITS_PER_PTE_ENTRY	3
+#define BITS_PER_PTE_ENTRY	2
 #define BITS_PER_PMD_ENTRY	2
 #define BITS_PER_PGD_ENTRY	BITS_PER_PMD_ENTRY
 #endif
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 430f1ae..4ca510b 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -49,6 +49,8 @@
 #define PDC_MODEL_CPU_ID	6	/* returns cpu-id (only newer machines!) */
 #define PDC_MODEL_CAPABILITIES	7	/* returns OS32/OS64-flags	*/
 /* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
+#define  PDC_MODEL_OS64			(1 << 0)
+#define  PDC_MODEL_OS32			(1 << 1)
 #define  PDC_MODEL_IOPDIR_FDC		(1 << 2)
 #define  PDC_MODEL_NVA_MASK		(3 << 4)
 #define  PDC_MODEL_NVA_SUPPORTED	(0 << 4)
@@ -341,6 +343,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/page.h> /* for __PAGE_OFFSET */
+
 extern int pdc_type;
 
 /* Values for pdc_type */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 470a4b8..a27d2e2 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -50,11 +50,7 @@
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
 
 /* This is the size of the initially mapped kernel memory */
-#ifdef CONFIG_64BIT
 #define KERNEL_INITIAL_ORDER	24	/* 0 to 1<<24 = 16MB */
-#else
-#define KERNEL_INITIAL_ORDER	23	/* 0 to 1<<23 = 8MB */
-#endif
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
@@ -91,16 +87,25 @@
 
 /* Definitions for 1st level */
 #define PGDIR_SHIFT	(PMD_SHIFT + BITS_PER_PMD)
+#if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
+#define BITS_PER_PGD	(BITS_PER_LONG - PGDIR_SHIFT)
+#else
 #define BITS_PER_PGD	(PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY)
+#endif
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD    (1UL << BITS_PER_PGD)
 #define USER_PTRS_PER_PGD       PTRS_PER_PGD
 
+#ifdef CONFIG_64BIT
 #define MAX_ADDRBITS	(PGDIR_SHIFT + BITS_PER_PGD)
 #define MAX_ADDRESS	(1UL << MAX_ADDRBITS)
-
 #define SPACEID_SHIFT	(MAX_ADDRBITS - 32)
+#else
+#define MAX_ADDRBITS	(BITS_PER_LONG)
+#define MAX_ADDRESS	(1UL << MAX_ADDRBITS)
+#define SPACEID_SHIFT	0
+#endif
 
 /* This calculates the number of initial pages we need for the initial
  * page tables */
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 6ef4b78..21eb45a 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -29,7 +29,8 @@ extern void smp_send_reschedule(int cpu);
 extern void smp_send_all_nop(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 #endif /* !ASSEMBLY */
 
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 016d3fc..67db072 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -11,10 +11,25 @@ obj-y	     	:= cache.o pacache.o setup.o traps.o time.o irq.o \
 		   process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
 		   topology.o
 
+ifdef CONFIG_FUNCTION_TRACER
+# Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_cache.o = -pg
+CFLAGS_REMOVE_irq.o = -pg
+CFLAGS_REMOVE_pacache.o = -pg
+CFLAGS_REMOVE_perf.o = -pg
+CFLAGS_REMOVE_traps.o = -pg
+CFLAGS_REMOVE_unaligned.o = -pg
+CFLAGS_REMOVE_unwind.o = -pg
+endif
+
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PA11)	+= pci-dma.o
 obj-$(CONFIG_PCI)	+= pci.o
 obj-$(CONFIG_MODULES)	+= module.o
 obj-$(CONFIG_64BIT)	+= binfmt_elf32.o sys_parisc32.o signal32.o
+obj-$(CONFIG_STACKTRACE)+= stacktrace.o
 # only supported for PCX-W/U in 64-bit mode at the moment
 obj-$(CONFIG_64BIT)	+= perf.o perf_asm.o
+obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 0db9fdc..ae3e70cd 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -505,6 +505,18 @@
 	STREG		\pte,0(\ptep)
 	.endm
 
+	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
+	 * to a CPU TLB 4k PFN (4k => 12 bits to shift) */
+	#define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
+
+	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
+	.macro		convert_for_tlb_insert20 pte
+	extrd,u		\pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
+				64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
+	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,\
+				(63-58)+PAGE_ADD_SHIFT,\pte
+	.endm
+
 	/* Convert the pte and prot to tlb insertion values.  How
 	 * this happens is quite subtle, read below */
 	.macro		make_insert_tlb	spc,pte,prot
@@ -544,8 +556,7 @@
 	depi		1,12,1,\prot
 
 	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-	extrd,u		\pte,(63-ASM_PFN_PTE_SHIFT)+(63-58),64-PAGE_SHIFT,\pte
-	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,63-58,\pte
+	convert_for_tlb_insert20 \pte
 	.endm
 
 	/* Identical macro to make_insert_tlb above, except it
@@ -563,8 +574,8 @@
 
 	/* Get rid of prot bits and convert to page addr for iitlba */
 
-	depi		_PAGE_SIZE_ENCODING_DEFAULT,31,ASM_PFN_PTE_SHIFT,\pte
-	extru		\pte,24,25,\pte
+	depi		0,31,ASM_PFN_PTE_SHIFT,\pte
+	SHRREG		\pte,(ASM_PFN_PTE_SHIFT-(31-26)),\pte
 	.endm
 
 	/* This is for ILP32 PA2.0 only.  The TLB insertion needs
@@ -1244,10 +1255,9 @@ nadtlb_check_flush_20w:
 	depdi,z         7,7,3,prot
 	depdi           1,10,1,prot
 
-	/* Get rid of prot bits and convert to page addr for idtlbt */
+	/* Drop prot bits from pte and convert to page addr for idtlbt */
+	convert_for_tlb_insert20 pte
 
-	depdi		0,63,12,pte
-	extrd,u         pte,56,52,pte
 	idtlbt          pte,prot
 
 	rfir
@@ -1337,8 +1347,8 @@ nadtlb_check_flush_11:
 
 	/* Get rid of prot bits and convert to page addr for idtlba */
 
-	depi		0,31,12,pte
-	extru		pte,24,25,pte
+	depi		0,31,ASM_PFN_PTE_SHIFT,pte
+	SHRREG		pte,(ASM_PFN_PTE_SHIFT-(31-26)),pte
 
 	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
@@ -1403,10 +1413,9 @@ nadtlb_check_flush_20:
 	depdi,z         7,7,3,prot
 	depdi           1,10,1,prot
 
-	/* Get rid of prot bits and convert to page addr for idtlbt */
+	/* Drop prot bits from pte and convert to page addr for idtlbt */
+	convert_for_tlb_insert20 pte
 
-	depdi		0,63,12,pte
-	extrd,u         pte,56,32,pte
 	idtlbt          pte,prot
 
 	rfir
@@ -2176,6 +2185,33 @@ syscall_do_resched:
 ENDPROC(syscall_exit)
 
 
+#ifdef CONFIG_FUNCTION_TRACER
+	.import ftrace_function_trampoline,code
+ENTRY(_mcount)
+	copy	%r3, %arg2
+	b	ftrace_function_trampoline
+	nop
+ENDPROC(_mcount)
+
+ENTRY(return_to_handler)
+	load32	return_trampoline, %rp
+	copy	%ret0, %arg0
+	copy	%ret1, %arg1
+	b	ftrace_return_to_handler
+	nop
+return_trampoline:
+	copy	%ret0, %rp
+	copy	%r23, %ret0
+	copy	%r24, %ret1
+
+.globl ftrace_stub
+ftrace_stub:
+	bv	%r0(%rp)
+	nop
+ENDPROC(return_to_handler)
+#endif	/* CONFIG_FUNCTION_TRACER */
+
+
 get_register:
 	/*
 	 * get_register is used by the non access tlb miss handlers to
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index f6d2412..4c247e0 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -527,7 +527,11 @@ int pdc_model_capabilities(unsigned long *capabilities)
         pdc_result[0] = 0; /* preset zero (call may not be implemented!) */
         retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
         convert_to_wide(pdc_result);
-        *capabilities = pdc_result[0];
+        if (retval == PDC_OK) {
+                *capabilities = pdc_result[0];
+        } else {
+                *capabilities = PDC_MODEL_OS32;
+        }
         spin_unlock_irqrestore(&pdc_lock, flags);
 
         return retval;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
new file mode 100644
index 0000000..9877372
--- /dev/null
+++ b/arch/parisc/kernel/ftrace.c
@@ -0,0 +1,185 @@
+/*
+ * Code for tracing calls in Linux kernel.
+ * Copyright (C) 2009 Helge Deller <deller@gmx.de>
+ *
+ * based on code for x86 which is:
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * future possible enhancements:
+ * 	- add CONFIG_DYNAMIC_FTRACE
+ *	- add CONFIG_STACK_TRACER
+ */
+
+#include <linux/init.h>
+#include <linux/ftrace.h>
+
+#include <asm/sections.h>
+#include <asm/ftrace.h>
+
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+				unsigned long func, int *depth)
+{
+	int index;
+
+	if (!current->ret_stack)
+		return -EBUSY;
+
+	/* The return trace stack is full */
+	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+		atomic_inc(&current->trace_overrun);
+		return -EBUSY;
+	}
+
+	index = ++current->curr_ret_stack;
+	barrier();
+	current->ret_stack[index].ret = ret;
+	current->ret_stack[index].func = func;
+	current->ret_stack[index].calltime = time;
+	*depth = index;
+
+	return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+{
+	int index;
+
+	index = current->curr_ret_stack;
+
+	if (unlikely(index < 0)) {
+		ftrace_graph_stop();
+		WARN_ON(1);
+		/* Might as well panic, otherwise we have no where to go */
+		*ret = (unsigned long)
+			dereference_function_descriptor(&panic);
+		return;
+	}
+
+	*ret = current->ret_stack[index].ret;
+	trace->func = current->ret_stack[index].func;
+	trace->calltime = current->ret_stack[index].calltime;
+	trace->overrun = atomic_read(&current->trace_overrun);
+	trace->depth = index;
+	barrier();
+	current->curr_ret_stack--;
+
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(unsigned long retval0,
+				       unsigned long retval1)
+{
+	struct ftrace_graph_ret trace;
+	unsigned long ret;
+
+	pop_return_trace(&trace, &ret);
+	trace.rettime = cpu_clock(raw_smp_processor_id());
+	ftrace_graph_return(&trace);
+
+	if (unlikely(!ret)) {
+		ftrace_graph_stop();
+		WARN_ON(1);
+		/* Might as well panic. What else to do? */
+		ret = (unsigned long)
+			dereference_function_descriptor(&panic);
+	}
+
+	/* HACK: we hand over the old functions' return values
+	   in %r23 and %r24. Assembly in entry.S will take care
+	   and move those to their final registers %ret0 and %ret1 */
+	asm( "copy %0, %%r23 \n\t"
+	     "copy %1, %%r24 \n" : : "r" (retval0), "r" (retval1) );
+
+	return ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+	unsigned long old;
+	unsigned long long calltime;
+	struct ftrace_graph_ent trace;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+	*parent = (unsigned long)
+		  dereference_function_descriptor(&return_to_handler);
+
+	if (unlikely(!__kernel_text_address(old))) {
+		ftrace_graph_stop();
+		*parent = old;
+		WARN_ON(1);
+		return;
+	}
+
+	calltime = cpu_clock(raw_smp_processor_id());
+
+	if (push_return_trace(old, calltime,
+				self_addr, &trace.depth) == -EBUSY) {
+		*parent = old;
+		return;
+	}
+
+	trace.func = self_addr;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		current->curr_ret_stack--;
+		*parent = old;
+	}
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+
+void ftrace_function_trampoline(unsigned long parent,
+				unsigned long self_addr,
+				unsigned long org_sp_gr3)
+{
+	extern ftrace_func_t ftrace_trace_function;
+
+	if (function_trace_stop)
+		return;
+
+	if (ftrace_trace_function != ftrace_stub) {
+		ftrace_trace_function(parent, self_addr);
+		return;
+	}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	if (ftrace_graph_entry && ftrace_graph_return) {
+		unsigned long sp;
+		unsigned long *parent_rp;
+
+                asm volatile ("copy %%r30, %0" : "=r"(sp));
+		/* sanity check: is stack pointer which we got from
+		   assembler function in entry.S in a reasonable
+		   range compared to current stack pointer? */
+		if ((sp - org_sp_gr3) > 0x400)
+			return;
+
+		/* calculate pointer to %rp in stack */
+		parent_rp = (unsigned long *) org_sp_gr3 - 0x10;
+		/* sanity check: parent_rp should hold parent */
+		if (*parent_rp != parent)
+			return;
+		
+		prepare_ftrace_return(parent_rp, self_addr);
+		return;
+	}
+#endif
+}
+
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 1c740f5..4ea4229 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -311,12 +311,12 @@ unsigned long txn_alloc_addr(unsigned int virt_irq)
 	next_cpu++; /* assign to "next" CPU we want this bugger on */
 
 	/* validate entry */
-	while ((next_cpu < NR_CPUS) &&
+	while ((next_cpu < nr_cpu_ids) &&
 		(!per_cpu(cpu_data, next_cpu).txn_addr ||
 		 !cpu_online(next_cpu)))
 		next_cpu++;
 
-	if (next_cpu >= NR_CPUS) 
+	if (next_cpu >= nr_cpu_ids) 
 		next_cpu = 0;	/* nothing else, assign monarch */
 
 	return txn_affinity_addr(virt_irq, next_cpu);
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 9013243..ecd1c50 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -61,9 +61,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
-#include <linux/uaccess.h>
 
-#include <asm/sections.h>
 #include <asm/unwind.h>
 
 #if 0
@@ -115,8 +113,6 @@ struct got_entry {
 	Elf32_Addr addr;
 };
 
-#define Elf_Fdesc	Elf32_Fdesc
-
 struct stub_entry {
 	Elf32_Word insns[2]; /* each stub entry has two insns */
 };
@@ -125,8 +121,6 @@ struct got_entry {
 	Elf64_Addr addr;
 };
 
-#define Elf_Fdesc	Elf64_Fdesc
-
 struct stub_entry {
 	Elf64_Word insns[4]; /* each stub entry has four insns */
 };
@@ -916,15 +910,3 @@ void module_arch_cleanup(struct module *mod)
 	deregister_unwind_table(mod);
 	module_bug_cleanup(mod);
 }
-
-#ifdef CONFIG_64BIT
-void *dereference_function_descriptor(void *ptr)
-{
-	Elf64_Fdesc *desc = ptr;
-	void *p;
-
-	if (!probe_kernel_address(&desc->addr, p))
-		ptr = p;
-	return ptr;
-}
-#endif
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 0eecfbb..df65366 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -153,5 +153,10 @@ EXPORT_SYMBOL(node_data);
 EXPORT_SYMBOL(pfnnid_map);
 #endif
 
+#ifdef CONFIG_FUNCTION_TRACER
+extern void _mcount(void);
+EXPORT_SYMBOL(_mcount);
+#endif
+
 /* from pacache.S -- needed for copy_page */
 EXPORT_SYMBOL(copy_user_page_asm);
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8aa591e..6f69101 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -46,14 +46,15 @@
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/kallsyms.h>
+#include <linux/uaccess.h>
 
 #include <asm/io.h>
 #include <asm/asm-offsets.h>
 #include <asm/pdc.h>
 #include <asm/pdc_chassis.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
 #include <asm/unwind.h>
+#include <asm/sections.h>
 
 /*
  * The idle thread. There's no useful work to be
@@ -231,8 +232,8 @@ sys_clone(unsigned long clone_flags, unsigned long usp,
 	   
 	   However, these last 3 args are only examined
 	   if the proper flags are set. */
-	int __user *child_tidptr;
-	int __user *parent_tidptr;
+	int __user *parent_tidptr = (int __user *)regs->gr[24];
+	int __user *child_tidptr  = (int __user *)regs->gr[22];
 
 	/* usp must be word aligned.  This also prevents users from
 	 * passing in the value 1 (which is the signal for a special
@@ -243,16 +244,6 @@ sys_clone(unsigned long clone_flags, unsigned long usp,
 	if (usp == 0)
 	  usp = regs->gr[30];
 
-	if (clone_flags & CLONE_PARENT_SETTID)
-	  parent_tidptr = (int __user *)regs->gr[24];
-	else
-	  parent_tidptr = NULL;
-	
-	if (clone_flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID))
-	  child_tidptr = (int __user *)regs->gr[22];
-	else
-	  child_tidptr = NULL;
-
 	return do_fork(clone_flags, usp, regs, 0, parent_tidptr, child_tidptr);
 }
 
@@ -400,3 +391,15 @@ get_wchan(struct task_struct *p)
 	} while (count++ < 16);
 	return 0;
 }
+
+#ifdef CONFIG_64BIT
+void *dereference_function_descriptor(void *ptr)
+{
+	Elf64_Fdesc *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->addr, p))
+		ptr = p;
+	return ptr;
+}
+#endif
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index ecb6093..e09d0f7 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -100,8 +100,8 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
 	struct cpuinfo_parisc *p;
 
 #ifdef CONFIG_SMP
-	if (num_online_cpus() >= NR_CPUS) {
-		printk(KERN_INFO "num_online_cpus() >= NR_CPUS\n");
+	if (num_online_cpus() >= nr_cpu_ids) {
+		printk(KERN_INFO "num_online_cpus() >= nr_cpu_ids\n");
 		return 1;
 	}
 #else
@@ -214,7 +214,7 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
 	 */
 #ifdef CONFIG_SMP
 	if (cpuid) {
-		cpu_set(cpuid, cpu_present_map);
+		set_cpu_present(cpuid, true);
 		cpu_up(cpuid);
 	}
 #endif
@@ -364,6 +364,13 @@ show_cpuinfo (struct seq_file *m, void *v)
 				 boot_cpu_data.cpu_hz / 1000000,
 				 boot_cpu_data.cpu_hz % 1000000  );
 
+		seq_printf(m, "capabilities\t:");
+		if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS32)
+			seq_printf(m, " os32");
+		if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS64)
+			seq_printf(m, " os64");
+		seq_printf(m, "\n");
+
 		seq_printf(m, "model\t\t: %s\n"
 				"model name\t: %s\n",
 				 boot_cpu_data.pdc.sys_model_name,
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 9995d7e..1fd0f0c 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/bitops.h>
+#include <linux/ftrace.h>
 
 #include <asm/system.h>
 #include <asm/atomic.h>
@@ -113,14 +114,14 @@ halt_processor(void)
 {
 	/* REVISIT : redirect I/O Interrupts to another CPU? */
 	/* REVISIT : does PM *know* this CPU isn't available? */
-	cpu_clear(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), false);
 	local_irq_disable();
 	for (;;)
 		;
 }
 
 
-irqreturn_t
+irqreturn_t __irq_entry
 ipi_interrupt(int irq, void *dev_id) 
 {
 	int this_cpu = smp_processor_id();
@@ -214,11 +215,11 @@ ipi_send(int cpu, enum ipi_message_type op)
 }
 
 static void
-send_IPI_mask(cpumask_t mask, enum ipi_message_type op)
+send_IPI_mask(const struct cpumask *mask, enum ipi_message_type op)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, mask)
 		ipi_send(cpu, op);
 }
 
@@ -257,7 +258,7 @@ smp_send_all_nop(void)
 	send_IPI_allbutself(IPI_NOP);
 }
 
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	send_IPI_mask(mask, IPI_CALL_FUNC);
 }
@@ -296,13 +297,14 @@ smp_cpu_init(int cpunum)
 	mb();
 
 	/* Well, support 2.4 linux scheme as well. */
-	if (cpu_test_and_set(cpunum, cpu_online_map))
+	if (cpu_isset(cpunum, cpu_online_map))
 	{
 		extern void machine_halt(void); /* arch/parisc.../process.c */
 
 		printk(KERN_CRIT "CPU#%d already initialized!\n", cpunum);
 		machine_halt();
 	}  
+	set_cpu_online(cpunum, true);
 
 	/* Initialise the idle task for this CPU */
 	atomic_inc(&init_mm.mm_count);
@@ -424,8 +426,8 @@ void __init smp_prepare_boot_cpu(void)
 	/* Setup BSP mappings */
 	printk(KERN_INFO "SMP: bootstrap CPU ID is %d\n", bootstrap_processor);
 
-	cpu_set(bootstrap_processor, cpu_online_map);
-	cpu_set(bootstrap_processor, cpu_present_map);
+	set_cpu_online(bootstrap_processor, true);
+	set_cpu_present(bootstrap_processor, true);
 }
 
 
@@ -436,8 +438,7 @@ void __init smp_prepare_boot_cpu(void)
 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	cpus_clear(cpu_present_map);
-	cpu_set(0, cpu_present_map);
+	init_cpu_present(cpumask_of(0));
 
 	parisc_max_cpus = max_cpus;
 	if (!max_cpus)
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
new file mode 100644
index 0000000..2fe914c
--- /dev/null
+++ b/arch/parisc/kernel/stacktrace.c
@@ -0,0 +1,63 @@
+/*
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2009 Helge Deller <deller@gmx.de>
+ *  based on arch/x86/kernel/stacktrace.c by Ingo Molnar <mingo@redhat.com>
+ *  and parisc unwind functions by Randolph Chung <tausq@debian.org>
+ *
+ *  TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT)
+ */
+#include <linux/module.h>
+#include <linux/stacktrace.h>
+
+#include <asm/unwind.h>
+
+static void dump_trace(struct task_struct *task, struct stack_trace *trace)
+{
+	struct unwind_frame_info info;
+
+	/* initialize unwind info */
+	if (task == current) {
+		unsigned long sp;
+		struct pt_regs r;
+HERE:
+		asm volatile ("copy %%r30, %0" : "=r"(sp));
+		memset(&r, 0, sizeof(struct pt_regs));
+		r.iaoq[0] = (unsigned long)&&HERE;
+		r.gr[2] = (unsigned long)__builtin_return_address(0);
+		r.gr[30] = sp;
+		unwind_frame_init(&info, task, &r);
+	} else {
+		unwind_frame_init_from_blocked_task(&info, task);
+	}
+
+	/* unwind stack and save entries in stack_trace struct */
+	trace->nr_entries = 0;
+	while (trace->nr_entries < trace->max_entries) {
+		if (unwind_once(&info) < 0 || info.ip == 0)
+			break;
+
+		if (__kernel_text_address(info.ip))
+			trace->entries[trace->nr_entries++] = info.ip;
+	}
+}
+
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	dump_trace(current, trace);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	dump_trace(tsk, trace);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 69b6eeb..59fc1a4 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -365,17 +365,51 @@ tracesys_sigexit:
 
 
 	/*********************************************************
-		Light-weight-syscall code
+		32/64-bit Light-Weight-Syscall ABI
 
-		r20 - lws number
-		r26,r25,r24,r23,r22 - Input registers
-		r28 - Function return register
-		r21 - Error code.
+		* - Indicates a hint for userspace inline asm
+		implementations.
 
-		Scracth: Any of the above that aren't being
-		currently used, including r1. 
+		Syscall number (caller-saves)
+	        - %r20
+	        * In asm clobber.
 
-		Return pointer: r31 (Not usable)
+		Argument registers (caller-saves)
+	        - %r26, %r25, %r24, %r23, %r22
+	        * In asm input.
+
+		Return registers (caller-saves)
+	        - %r28 (return), %r21 (errno)
+	        * In asm output.
+
+		Caller-saves registers
+	        - %r1, %r27, %r29
+	        - %r2 (return pointer)
+	        - %r31 (ble link register)
+	        * In asm clobber.
+
+		Callee-saves registers
+	        - %r3-%r18
+	        - %r30 (stack pointer)
+	        * Not in asm clobber.
+
+		If userspace is 32-bit:
+		Callee-saves registers
+	        - %r19 (32-bit PIC register)
+
+		Differences from 32-bit calling convention:
+		- Syscall number in %r20
+		- Additional argument register %r22 (arg4)
+		- Callee-saves %r19.
+
+		If userspace is 64-bit:
+		Callee-saves registers
+		- %r27 (64-bit PIC register)
+
+		Differences from 64-bit calling convention:
+		- Syscall number in %r20
+		- Additional argument register %r22 (arg4)
+		- Callee-saves %r27.
 
 		Error codes returned by entry path:
 
@@ -473,7 +507,8 @@ lws_compare_and_swap64:
 	b,n	lws_compare_and_swap
 #else
 	/* If we are not a 64-bit kernel, then we don't
-	 * implement having 64-bit input registers
+	 * have 64-bit input registers, and calling
+	 * the 64-bit LWS CAS returns ENOSYS.
 	 */
 	b,n	lws_exit_nosys
 #endif
@@ -635,12 +670,15 @@ END(sys_call_table64)
 	/*
 		All light-weight-syscall atomic operations 
 		will use this set of locks 
+
+		NOTE: The lws_lock_start symbol must be
+		at least 16-byte aligned for safe use
+		with ldcw.
 	*/
 	.section .data
 	.align	PAGE_SIZE
 ENTRY(lws_lock_start)
 	/* lws locks */
-	.align 16
 	.rept 16
 	/* Keep locks aligned at 16-bytes */
 	.word 1
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index e75cae6..d4dd056 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -24,6 +24,7 @@
 #include <linux/profile.h>
 #include <linux/clocksource.h>
 #include <linux/platform_device.h>
+#include <linux/ftrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -53,7 +54,7 @@ static unsigned long clocktick __read_mostly;	/* timer cycles per tick */
  * held off for an arbitrarily long period of time by interrupts being
  * disabled, so we may miss one or more ticks.
  */
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
 {
 	unsigned long now;
 	unsigned long next_tick;
@@ -216,14 +217,14 @@ void __init start_cpu_itimer(void)
 	per_cpu(cpu_data, cpu).it_value = next_tick;
 }
 
-static struct platform_device rtc_parisc_dev = {
-	.name = "rtc-parisc",
+static struct platform_device rtc_generic_dev = {
+	.name = "rtc-generic",
 	.id = -1,
 };
 
 static int __init rtc_init(void)
 {
-	if (platform_device_register(&rtc_parisc_dev) < 0)
+	if (platform_device_register(&rtc_generic_dev) < 0)
 		printk(KERN_ERR "unable to register rtc device...\n");
 
 	/* not necessarily an error */
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index ba658d2..c32f5d6 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -247,6 +247,8 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
 
 	oops_in_progress = 1;
 
+	oops_enter();
+
 	/* Amuse the user in a SPARC fashion */
 	if (err) printk(
 KERN_CRIT "      _______________________________ \n"
@@ -293,6 +295,7 @@ KERN_CRIT "                     ||     ||\n");
 		panic("Fatal exception");
 	}
 
+	oops_exit();
 	do_exit(SIGSEGV);
 }
 
@@ -494,7 +497,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o
 	panic(msg);
 }
 
-void handle_interruption(int code, struct pt_regs *regs)
+void notrace handle_interruption(int code, struct pt_regs *regs)
 {
 	unsigned long fault_address = 0;
 	unsigned long fault_space = 0;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 1a3b6cc..fd2cc4f 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -54,6 +54,8 @@ SECTIONS
 		TEXT_TEXT
 		SCHED_TEXT
 		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
 		*(.text.do_softirq)
 		*(.text.sys_exit)
 		*(.text.do_sigaltstack)
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 9d704d9..4356ceb 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -456,6 +456,13 @@ void __init mem_init(void)
 {
 	int codesize, reservedpages, datasize, initsize;
 
+	/* Do sanity checks on page table constants */
+	BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t));
+	BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t));
+	BUILD_BUG_ON(PGD_ENTRY_SIZE != sizeof(pgd_t));
+	BUILD_BUG_ON(PAGE_SHIFT + BITS_PER_PTE + BITS_PER_PMD + BITS_PER_PGD
+			> BITS_PER_LONG);
+
 	high_memory = __va((max_pfn << PAGE_SHIFT));
 
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 67f1812..cdb6fd8 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -50,6 +50,9 @@ enum ps3_param_av_multi_out {
 
 enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void);
 
+extern u64 ps3_os_area_get_rtc_diff(void);
+extern void ps3_os_area_set_rtc_diff(u64 rtc_diff);
+
 /* dma routines */
 
 enum ps3_dma_page_size {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index c956403..926ea86 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1127,3 +1127,19 @@ void div128_by_32(u64 dividend_high, u64 dividend_low,
 	dr->result_low  = ((u64)y << 32) + z;
 
 }
+
+static int __init rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!ppc_md.get_rtc_time)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+
+module_init(rtc_init);
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index e1c83c2..86e392b1 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -808,6 +808,7 @@ u64 ps3_os_area_get_rtc_diff(void)
 {
 	return saved_params.rtc_diff;
 }
+EXPORT_SYMBOL(ps3_os_area_get_rtc_diff);
 
 /**
  * ps3_os_area_set_rtc_diff - Set the rtc diff value.
@@ -823,6 +824,7 @@ void ps3_os_area_set_rtc_diff(u64 rtc_diff)
 		os_area_queue_work();
 	}
 }
+EXPORT_SYMBOL(ps3_os_area_set_rtc_diff);
 
 /**
  * ps3_os_area_get_av_multi_out - Returns the default video mode.
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
index 235c13e..136aa06 100644
--- a/arch/powerpc/platforms/ps3/platform.h
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -64,8 +64,6 @@ int ps3_set_rtc_time(struct rtc_time *time);
 
 void __init ps3_os_area_save_params(void);
 void __init ps3_os_area_init(void);
-u64 ps3_os_area_get_rtc_diff(void);
-void ps3_os_area_set_rtc_diff(u64 rtc_diff);
 
 /* spu */
 
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 3331ccb..6618182 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -270,8 +270,6 @@ define_machine(ps3) {
 	.init_IRQ			= ps3_init_IRQ,
 	.panic				= ps3_panic,
 	.get_boot_time			= ps3_get_boot_time,
-	.set_rtc_time			= ps3_set_rtc_time,
-	.get_rtc_time			= ps3_get_rtc_time,
 	.set_dabr			= ps3_set_dabr,
 	.calibrate_decr			= ps3_calibrate_decr,
 	.progress			= ps3_progress,
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index d0daf7d..b178a1e 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/platform_device.h>
 
 #include <asm/rtc.h>
 #include <asm/lv1call.h>
@@ -74,23 +75,20 @@ static u64 read_rtc(void)
 	return rtc_val;
 }
 
-int ps3_set_rtc_time(struct rtc_time *tm)
+unsigned long __init ps3_get_boot_time(void)
 {
-	u64 now = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-		tm->tm_hour, tm->tm_min, tm->tm_sec);
-
-	ps3_os_area_set_rtc_diff(now - read_rtc());
-	return 0;
+	return read_rtc() + ps3_os_area_get_rtc_diff();
 }
 
-void ps3_get_rtc_time(struct rtc_time *tm)
+static int __init ps3_rtc_init(void)
 {
-	to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
-	tm->tm_year -= 1900;
-	tm->tm_mon -= 1;
-}
+	struct platform_device *pdev;
 
-unsigned long __init ps3_get_boot_time(void)
-{
-	return read_rtc() + ps3_os_area_get_rtc_diff();
+	pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
 }
+
+module_init(ps3_rtc_init);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index d42f826..f934225 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -22,6 +22,7 @@
 #include "linux/kernel.h"
 #include "linux/module.h"
 #include "linux/blkdev.h"
+#include "linux/ata.h"
 #include "linux/hdreg.h"
 #include "linux/init.h"
 #include "linux/cdrom.h"
@@ -1308,16 +1309,15 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct ubd *ubd_dev = bdev->bd_disk->private_data;
-	struct hd_driveid ubd_id = {
-		.cyls		= 0,
-		.heads		= 128,
-		.sectors	= 32,
-	};
+	u16 ubd_id[ATA_ID_WORDS];
 
 	switch (cmd) {
 		struct cdrom_volctrl volume;
 	case HDIO_GET_IDENTITY:
-		ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
+		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
+		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
+		ubd_id[ATA_ID_HEADS]	= 128;
+		ubd_id[ATA_ID_SECTORS]	= 32;
 		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
 				 sizeof(ubd_id)))
 			return -EFAULT;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 5bc5d16..8126e8d 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -40,7 +40,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 
 	debug_kmap_atomic(type);
 
-	debug_kmap_atomic(type);
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index bff0c90..e331f77 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -39,6 +39,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 
 	pagefault_disable();
 
+	debug_kmap_atomic(type);
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
@@ -72,7 +73,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	debug_kmap_atomic(type);
 	/*
 	 * Force other mappings to Oops if they'll try to access this pte
 	 * without first remap it.  Keeping stale mappings around is a bad idea
diff --git a/crypto/shash.c b/crypto/shash.c
index 7a65973..2ccc8b0 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -77,6 +77,9 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 	u8 buf[shash_align_buffer_size(unaligned_len, alignmask)]
 		__attribute__ ((aligned));
 
+	if (unaligned_len > len)
+		unaligned_len = len;
+
 	memcpy(buf, data, unaligned_len);
 
 	return shash->update(desc, buf, unaligned_len) ?:
diff --git a/crypto/xor.c b/crypto/xor.c
index b2e6db0..996b6ee 100644
--- a/crypto/xor.c
+++ b/crypto/xor.c
@@ -18,8 +18,8 @@
 
 #define BH_TRACE 0
 #include <linux/module.h>
-#include <linux/raid/md.h>
 #include <linux/raid/xor.h>
+#include <linux/jiffies.h>
 #include <asm/xor.h>
 
 /* The xor routines to use.  */
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 45c5a33..31693bc 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -4,6 +4,7 @@
  * Filesystem request handling methods
  */
 
+#include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/blkdev.h>
 #include <linux/skbuff.h>
@@ -267,7 +268,7 @@ aoecmd_ata_rw(struct aoedev *d)
 		writebit = 0;
 	}
 
-	ah->cmdstat = WIN_READ | writebit | extbit;
+	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 
 	/* mark all tracking fields and load out */
 	buf->nframesout += 1;
@@ -362,10 +363,10 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 	switch (ah->cmdstat) {
 	default:
 		break;
-	case WIN_READ:
-	case WIN_READ_EXT:
-	case WIN_WRITE:
-	case WIN_WRITE_EXT:
+	case ATA_CMD_PIO_READ:
+	case ATA_CMD_PIO_READ_EXT:
+	case ATA_CMD_PIO_WRITE:
+	case ATA_CMD_PIO_WRITE_EXT:
 		put_lba(ah, f->lba);
 
 		n = f->bcnt;
@@ -812,8 +813,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 			d->htgt = NULL;
 		n = ahout->scnt << 9;
 		switch (ahout->cmdstat) {
-		case WIN_READ:
-		case WIN_READ_EXT:
+		case ATA_CMD_PIO_READ:
+		case ATA_CMD_PIO_READ_EXT:
 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
 				printk(KERN_ERR
 					"aoe: %s.  skb->len=%d need=%ld\n",
@@ -823,8 +824,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 				return;
 			}
 			memcpy(f->bufaddr, ahin+1, n);
-		case WIN_WRITE:
-		case WIN_WRITE_EXT:
+		case ATA_CMD_PIO_WRITE:
+		case ATA_CMD_PIO_WRITE_EXT:
 			ifp = getif(t, skb->dev);
 			if (ifp) {
 				ifp->lost = 0;
@@ -838,7 +839,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 				goto xmit;
 			}
 			break;
-		case WIN_IDENTIFY:
+		case ATA_CMD_ID_ATA:
 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
 				printk(KERN_INFO
 					"aoe: runt data size in ataid.  skb->len=%d\n",
@@ -914,7 +915,7 @@ aoecmd_ata_id(struct aoedev *d)
 
 	/* set up ata header */
 	ah->scnt = 1;
-	ah->cmdstat = WIN_IDENTIFY;
+	ah->cmdstat = ATA_CMD_ID_ATA;
 	ah->lba3 = 0xa0;
 
 	skb->dev = t->ifp->nd;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 482c0c4..3c11f06 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -42,6 +42,8 @@
 #include <linux/ata.h>
 #include <linux/hdreg.h>
 
+#define HD_IRQ 14
+
 #define REALLY_SLOW_IO
 #include <asm/system.h>
 #include <asm/io.h>
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 119be34..6cccdc3 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -89,6 +89,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/platform_device.h>
 #if defined(CONFIG_OF)
@@ -208,7 +209,7 @@ struct ace_device {
 	struct gendisk *gd;
 
 	/* Inserted CF card parameters */
-	struct hd_driveid cf_id;
+	u16 cf_id[ATA_ID_WORDS];
 };
 
 static int ace_major;
@@ -402,21 +403,14 @@ static void ace_dump_regs(struct ace_device *ace)
 		 ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
 }
 
-void ace_fix_driveid(struct hd_driveid *id)
+void ace_fix_driveid(u16 *id)
 {
 #if defined(__BIG_ENDIAN)
-	u16 *buf = (void *)id;
 	int i;
 
 	/* All half words have wrong byte order; swap the bytes */
-	for (i = 0; i < sizeof(struct hd_driveid); i += 2, buf++)
-		*buf = le16_to_cpu(*buf);
-
-	/* Some of the data values are 32bit; swap the half words  */
-	id->lba_capacity = ((id->lba_capacity >> 16) & 0x0000FFFF) |
-	    ((id->lba_capacity << 16) & 0xFFFF0000);
-	id->spg = ((id->spg >> 16) & 0x0000FFFF) |
-	    ((id->spg << 16) & 0xFFFF0000);
+	for (i = 0; i < ATA_ID_WORDS; i++, id++)
+		*id = le16_to_cpu(*id);
 #endif
 }
 
@@ -614,7 +608,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		break;
 
 	case ACE_FSM_STATE_IDENTIFY_COMPLETE:
-		ace_fix_driveid(&ace->cf_id);
+		ace_fix_driveid(&ace->cf_id[0]);
 		ace_dump_mem(&ace->cf_id, 512);	/* Debug: Dump out disk ID */
 
 		if (ace->data_result) {
@@ -627,9 +621,10 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			ace->media_change = 0;
 
 			/* Record disk parameters */
-			set_capacity(ace->gd, ace->cf_id.lba_capacity);
+			set_capacity(ace->gd,
+				ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
 			dev_info(ace->dev, "capacity: %i sectors\n",
-				 ace->cf_id.lba_capacity);
+				ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
 		}
 
 		/* We're done, drop to IDLE state and notify waiters */
@@ -928,12 +923,13 @@ static int ace_release(struct gendisk *disk, fmode_t mode)
 static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
 	struct ace_device *ace = bdev->bd_disk->private_data;
+	u16 *cf_id = &ace->cf_id[0];
 
 	dev_dbg(ace->dev, "ace_getgeo()\n");
 
-	geo->heads = ace->cf_id.heads;
-	geo->sectors = ace->cf_id.sectors;
-	geo->cylinders = ace->cf_id.cyls;
+	geo->heads	= cf_id[ATA_ID_HEADS];
+	geo->sectors	= cf_id[ATA_ID_SECTORS];
+	geo->cylinders	= cf_id[ATA_ID_CYLS];
 
 	return 0;
 }
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 10ad41b..dcd352a 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -90,10 +90,30 @@ static struct hwrng timeriomem_rng_ops = {
 
 static int __init timeriomem_rng_probe(struct platform_device *pdev)
 {
+	struct resource *res, *mem;
 	int ret;
 
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!res)
+		return -ENOENT;
+
+	mem = request_mem_region(res->start, res->end - res->start + 1,
+				 pdev->name);
+	if (mem == NULL)
+		return -EBUSY;
+
+	dev_set_drvdata(&pdev->dev, mem);
+
 	timeriomem_rng_data = pdev->dev.platform_data;
 
+	timeriomem_rng_data->address = ioremap(res->start,
+						res->end - res->start + 1);
+	if (!timeriomem_rng_data->address) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
 	if (timeriomem_rng_data->period != 0
 		&& usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
 		timeriomem_rng_timer.expires = jiffies;
@@ -104,23 +124,34 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
 	timeriomem_rng_data->present = 1;
 
 	ret = hwrng_register(&timeriomem_rng_ops);
-	if (ret) {
-		dev_err(&pdev->dev, "problem registering\n");
-		return ret;
-	}
+	if (ret)
+		goto err_register;
 
 	dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
 			timeriomem_rng_data->address,
 			timeriomem_rng_data->period);
 
 	return 0;
+
+err_register:
+	dev_err(&pdev->dev, "problem registering\n");
+	iounmap(timeriomem_rng_data->address);
+err_ioremap:
+	release_resource(mem);
+
+	return ret;
 }
 
 static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
 {
+	struct resource *mem = dev_get_drvdata(&pdev->dev);
+
 	del_timer_sync(&timeriomem_rng_timer);
 	hwrng_unregister(&timeriomem_rng_ops);
 
+	iounmap(timeriomem_rng_data->address);
+	release_resource(mem);
+
 	return 0;
 }
 
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index d9e751b..af9761c 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -101,6 +101,7 @@ struct buffer_desc {
 	u32 phys_addr;
 	u32 __reserved[4];
 	struct buffer_desc *next;
+	enum dma_data_direction dir;
 };
 
 struct crypt_ctl {
@@ -132,14 +133,10 @@ struct crypt_ctl {
 struct ablk_ctx {
 	struct buffer_desc *src;
 	struct buffer_desc *dst;
-	unsigned src_nents;
-	unsigned dst_nents;
 };
 
 struct aead_ctx {
 	struct buffer_desc *buffer;
-	unsigned short assoc_nents;
-	unsigned short src_nents;
 	struct scatterlist ivlist;
 	/* used when the hmac is not on one sg entry */
 	u8 *hmac_virt;
@@ -312,7 +309,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
 	}
 }
 
-static void free_buf_chain(struct buffer_desc *buf, u32 phys)
+static void free_buf_chain(struct device *dev, struct buffer_desc *buf,u32 phys)
 {
 	while (buf) {
 		struct buffer_desc *buf1;
@@ -320,6 +317,7 @@ static void free_buf_chain(struct buffer_desc *buf, u32 phys)
 
 		buf1 = buf->next;
 		phys1 = buf->phys_next;
+		dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir);
 		dma_pool_free(buffer_pool, buf, phys);
 		buf = buf1;
 		phys = phys1;
@@ -348,7 +346,6 @@ static void one_packet(dma_addr_t phys)
 	struct crypt_ctl *crypt;
 	struct ixp_ctx *ctx;
 	int failed;
-	enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
 
 	failed = phys & 0x1 ? -EBADMSG : 0;
 	phys &= ~0x3;
@@ -358,13 +355,8 @@ static void one_packet(dma_addr_t phys)
 	case CTL_FLAG_PERFORM_AEAD: {
 		struct aead_request *req = crypt->data.aead_req;
 		struct aead_ctx *req_ctx = aead_request_ctx(req);
-		dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents,
-				DMA_TO_DEVICE);
-		dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
-		dma_unmap_sg(dev, req->src, req_ctx->src_nents,
-				DMA_BIDIRECTIONAL);
 
-		free_buf_chain(req_ctx->buffer, crypt->src_buf);
+		free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
 		if (req_ctx->hmac_virt) {
 			finish_scattered_hmac(crypt);
 		}
@@ -374,16 +366,11 @@ static void one_packet(dma_addr_t phys)
 	case CTL_FLAG_PERFORM_ABLK: {
 		struct ablkcipher_request *req = crypt->data.ablk_req;
 		struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
-		int nents;
+
 		if (req_ctx->dst) {
-			nents = req_ctx->dst_nents;
-			dma_unmap_sg(dev, req->dst, nents, DMA_FROM_DEVICE);
-			free_buf_chain(req_ctx->dst, crypt->dst_buf);
-			src_direction = DMA_TO_DEVICE;
+			free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
 		}
-		nents = req_ctx->src_nents;
-		dma_unmap_sg(dev, req->src, nents, src_direction);
-		free_buf_chain(req_ctx->src, crypt->src_buf);
+		free_buf_chain(dev, req_ctx->src, crypt->src_buf);
 		req->base.complete(&req->base, failed);
 		break;
 	}
@@ -750,56 +737,35 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 	return 0;
 }
 
-static int count_sg(struct scatterlist *sg, int nbytes)
+static struct buffer_desc *chainup_buffers(struct device *dev,
+		struct scatterlist *sg,	unsigned nbytes,
+		struct buffer_desc *buf, gfp_t flags,
+		enum dma_data_direction dir)
 {
-	int i;
-	for (i = 0; nbytes > 0; i++, sg = sg_next(sg))
-		nbytes -= sg->length;
-	return i;
-}
-
-static struct buffer_desc *chainup_buffers(struct scatterlist *sg,
-			unsigned nbytes, struct buffer_desc *buf, gfp_t flags)
-{
-	int nents = 0;
-
-	while (nbytes > 0) {
+	for (;nbytes > 0; sg = scatterwalk_sg_next(sg)) {
+		unsigned len = min(nbytes, sg->length);
 		struct buffer_desc *next_buf;
 		u32 next_buf_phys;
-		unsigned len = min(nbytes, sg_dma_len(sg));
+		void *ptr;
 
-		nents++;
 		nbytes -= len;
-		if (!buf->phys_addr) {
-			buf->phys_addr = sg_dma_address(sg);
-			buf->buf_len = len;
-			buf->next = NULL;
-			buf->phys_next = 0;
-			goto next;
-		}
-		/* Two consecutive chunks on one page may be handled by the old
-		 * buffer descriptor, increased by the length of the new one
-		 */
-		if (sg_dma_address(sg) == buf->phys_addr + buf->buf_len) {
-			buf->buf_len += len;
-			goto next;
-		}
+		ptr = page_address(sg_page(sg)) + sg->offset;
 		next_buf = dma_pool_alloc(buffer_pool, flags, &next_buf_phys);
-		if (!next_buf)
-			return NULL;
+		if (!next_buf) {
+			buf = NULL;
+			break;
+		}
+		sg_dma_address(sg) = dma_map_single(dev, ptr, len, dir);
 		buf->next = next_buf;
 		buf->phys_next = next_buf_phys;
-
 		buf = next_buf;
-		buf->next = NULL;
-		buf->phys_next = 0;
+
 		buf->phys_addr = sg_dma_address(sg);
 		buf->buf_len = len;
-next:
-		if (nbytes > 0) {
-			sg = sg_next(sg);
-		}
+		buf->dir = dir;
 	}
+	buf->next = NULL;
+	buf->phys_next = 0;
 	return buf;
 }
 
@@ -860,12 +826,12 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
-	int ret = -ENOMEM;
 	struct ix_sa_dir *dir;
 	struct crypt_ctl *crypt;
-	unsigned int nbytes = req->nbytes, nents;
+	unsigned int nbytes = req->nbytes;
 	enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
 	struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
+	struct buffer_desc src_hook;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 				GFP_KERNEL : GFP_ATOMIC;
 
@@ -878,7 +844,7 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
 
 	crypt = get_crypt_desc();
 	if (!crypt)
-		return ret;
+		return -ENOMEM;
 
 	crypt->data.ablk_req = req;
 	crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -891,53 +857,41 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
 	BUG_ON(ivsize && !req->info);
 	memcpy(crypt->iv, req->info, ivsize);
 	if (req->src != req->dst) {
+		struct buffer_desc dst_hook;
 		crypt->mode |= NPE_OP_NOT_IN_PLACE;
-		nents = count_sg(req->dst, nbytes);
 		/* This was never tested by Intel
 		 * for more than one dst buffer, I think. */
-		BUG_ON(nents != 1);
-		req_ctx->dst_nents = nents;
-		dma_map_sg(dev, req->dst, nents, DMA_FROM_DEVICE);
-		req_ctx->dst = dma_pool_alloc(buffer_pool, flags,&crypt->dst_buf);
-		if (!req_ctx->dst)
-			goto unmap_sg_dest;
-		req_ctx->dst->phys_addr = 0;
-		if (!chainup_buffers(req->dst, nbytes, req_ctx->dst, flags))
+		BUG_ON(req->dst->length < nbytes);
+		req_ctx->dst = NULL;
+		if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook,
+					flags, DMA_FROM_DEVICE))
 			goto free_buf_dest;
 		src_direction = DMA_TO_DEVICE;
+		req_ctx->dst = dst_hook.next;
+		crypt->dst_buf = dst_hook.phys_next;
 	} else {
 		req_ctx->dst = NULL;
-		req_ctx->dst_nents = 0;
 	}
-	nents = count_sg(req->src, nbytes);
-	req_ctx->src_nents = nents;
-	dma_map_sg(dev, req->src, nents, src_direction);
-
-	req_ctx->src = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
-	if (!req_ctx->src)
-		goto unmap_sg_src;
-	req_ctx->src->phys_addr = 0;
-	if (!chainup_buffers(req->src, nbytes, req_ctx->src, flags))
+	req_ctx->src = NULL;
+	if (!chainup_buffers(dev, req->src, nbytes, &src_hook,
+				flags, src_direction))
 		goto free_buf_src;
 
+	req_ctx->src = src_hook.next;
+	crypt->src_buf = src_hook.phys_next;
 	crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK;
 	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
 	BUG_ON(qmgr_stat_overflow(SEND_QID));
 	return -EINPROGRESS;
 
 free_buf_src:
-	free_buf_chain(req_ctx->src, crypt->src_buf);
-unmap_sg_src:
-	dma_unmap_sg(dev, req->src, req_ctx->src_nents, src_direction);
+	free_buf_chain(dev, req_ctx->src, crypt->src_buf);
 free_buf_dest:
 	if (req->src != req->dst) {
-		free_buf_chain(req_ctx->dst, crypt->dst_buf);
-unmap_sg_dest:
-		dma_unmap_sg(dev, req->src, req_ctx->dst_nents,
-			DMA_FROM_DEVICE);
+		free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
 	}
 	crypt->ctl_flags = CTL_FLAG_UNUSED;
-	return ret;
+	return -ENOMEM;
 }
 
 static int ablk_encrypt(struct ablkcipher_request *req)
@@ -985,7 +939,7 @@ static int hmac_inconsistent(struct scatterlist *sg, unsigned start,
 			break;
 
 		offset += sg->length;
-		sg = sg_next(sg);
+		sg = scatterwalk_sg_next(sg);
 	}
 	return (start + nbytes > offset + sg->length);
 }
@@ -997,11 +951,10 @@ static int aead_perform(struct aead_request *req, int encrypt,
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
 	unsigned ivsize = crypto_aead_ivsize(tfm);
 	unsigned authsize = crypto_aead_authsize(tfm);
-	int ret = -ENOMEM;
 	struct ix_sa_dir *dir;
 	struct crypt_ctl *crypt;
-	unsigned int cryptlen, nents;
-	struct buffer_desc *buf;
+	unsigned int cryptlen;
+	struct buffer_desc *buf, src_hook;
 	struct aead_ctx *req_ctx = aead_request_ctx(req);
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 				GFP_KERNEL : GFP_ATOMIC;
@@ -1022,7 +975,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
 	}
 	crypt = get_crypt_desc();
 	if (!crypt)
-		return ret;
+		return -ENOMEM;
 
 	crypt->data.aead_req = req;
 	crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -1041,31 +994,27 @@ static int aead_perform(struct aead_request *req, int encrypt,
 		BUG(); /* -ENOTSUP because of my lazyness */
 	}
 
-	req_ctx->buffer = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
-	if (!req_ctx->buffer)
-		goto out;
-	req_ctx->buffer->phys_addr = 0;
 	/* ASSOC data */
-	nents = count_sg(req->assoc, req->assoclen);
-	req_ctx->assoc_nents = nents;
-	dma_map_sg(dev, req->assoc, nents, DMA_TO_DEVICE);
-	buf = chainup_buffers(req->assoc, req->assoclen, req_ctx->buffer,flags);
+	buf = chainup_buffers(dev, req->assoc, req->assoclen, &src_hook,
+		flags, DMA_TO_DEVICE);
+	req_ctx->buffer = src_hook.next;
+	crypt->src_buf = src_hook.phys_next;
 	if (!buf)
-		goto unmap_sg_assoc;
+		goto out;
 	/* IV */
 	sg_init_table(&req_ctx->ivlist, 1);
 	sg_set_buf(&req_ctx->ivlist, iv, ivsize);
-	dma_map_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
-	buf = chainup_buffers(&req_ctx->ivlist, ivsize, buf, flags);
+	buf = chainup_buffers(dev, &req_ctx->ivlist, ivsize, buf, flags,
+			DMA_BIDIRECTIONAL);
 	if (!buf)
-		goto unmap_sg_iv;
+		goto free_chain;
 	if (unlikely(hmac_inconsistent(req->src, cryptlen, authsize))) {
 		/* The 12 hmac bytes are scattered,
 		 * we need to copy them into a safe buffer */
 		req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
 				&crypt->icv_rev_aes);
 		if (unlikely(!req_ctx->hmac_virt))
-			goto unmap_sg_iv;
+			goto free_chain;
 		if (!encrypt) {
 			scatterwalk_map_and_copy(req_ctx->hmac_virt,
 				req->src, cryptlen, authsize, 0);
@@ -1075,33 +1024,28 @@ static int aead_perform(struct aead_request *req, int encrypt,
 		req_ctx->hmac_virt = NULL;
 	}
 	/* Crypt */
-	nents = count_sg(req->src, cryptlen + authsize);
-	req_ctx->src_nents = nents;
-	dma_map_sg(dev, req->src, nents, DMA_BIDIRECTIONAL);
-	buf = chainup_buffers(req->src, cryptlen + authsize, buf, flags);
+	buf = chainup_buffers(dev, req->src, cryptlen + authsize, buf, flags,
+			DMA_BIDIRECTIONAL);
 	if (!buf)
-		goto unmap_sg_src;
+		goto free_hmac_virt;
 	if (!req_ctx->hmac_virt) {
 		crypt->icv_rev_aes = buf->phys_addr + buf->buf_len - authsize;
 	}
+
 	crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD;
 	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
 	BUG_ON(qmgr_stat_overflow(SEND_QID));
 	return -EINPROGRESS;
-unmap_sg_src:
-	dma_unmap_sg(dev, req->src, req_ctx->src_nents, DMA_BIDIRECTIONAL);
+free_hmac_virt:
 	if (req_ctx->hmac_virt) {
 		dma_pool_free(buffer_pool, req_ctx->hmac_virt,
 				crypt->icv_rev_aes);
 	}
-unmap_sg_iv:
-	dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
-unmap_sg_assoc:
-	dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents, DMA_TO_DEVICE);
-	free_buf_chain(req_ctx->buffer, crypt->src_buf);
+free_chain:
+	free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
 out:
 	crypt->ctl_flags = CTL_FLAG_UNUSED;
-	return ret;
+	return -ENOMEM;
 }
 
 static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2281b50..36e0675 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -121,6 +121,7 @@ config MD_RAID10
 config MD_RAID456
 	tristate "RAID-4/RAID-5/RAID-6 mode"
 	depends on BLK_DEV_MD
+	select MD_RAID6_PQ
 	select ASYNC_MEMCPY
 	select ASYNC_XOR
 	---help---
@@ -151,34 +152,8 @@ config MD_RAID456
 
 	  If unsure, say Y.
 
-config MD_RAID5_RESHAPE
-	bool "Support adding drives to a raid-5 array"
-	depends on MD_RAID456
-	default y
-	---help---
-	  A RAID-5 set can be expanded by adding extra drives. This
-	  requires "restriping" the array which means (almost) every
-	  block must be written to a different place.
-
-          This option allows such restriping to be done while the array
-	  is online.
-
-	  You will need mdadm version 2.4.1 or later to use this
-	  feature safely.  During the early stage of reshape there is
-	  a critical section where live data is being over-written.  A
-	  crash during this time needs extra care for recovery.  The
-	  newer mdadm takes a copy of the data in the critical section
-	  and will restore it, if necessary, after a crash.
-
-	  The mdadm usage is e.g.
-	       mdadm --grow /dev/md1 --raid-disks=6
-	  to grow '/dev/md1' to having 6 disks.
-
-	  Note: The array can only be expanded, not contracted.
-	  There should be enough spares already present to make the new
-	  array workable.
-
-	  If unsure, say Y.
+config MD_RAID6_PQ
+	tristate
 
 config MD_MULTIPATH
 	tristate "Multipath I/O support"
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 72880b7..45cc595 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -2,20 +2,21 @@
 # Makefile for the kernel software RAID and LVM drivers.
 #
 
-dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
+dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
 		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
-dm-multipath-objs := dm-path-selector.o dm-mpath.o
-dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
+dm-multipath-y	+= dm-path-selector.o dm-mpath.o
+dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
 		    dm-snap-persistent.o
-dm-mirror-objs	:= dm-raid1.o
-md-mod-objs     := md.o bitmap.o
-raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
+dm-mirror-y	+= dm-raid1.o
+md-mod-y	+= md.o bitmap.o
+raid456-y	+= raid5.o
+raid6_pq-y	+= raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
 		   raid6int8.o raid6int16.o raid6int32.o \
 		   raid6altivec1.o raid6altivec2.o raid6altivec4.o \
 		   raid6altivec8.o \
 		   raid6mmx.o raid6sse1.o raid6sse2.o
-hostprogs-y	:= mktables
+hostprogs-y	+= mktables
 
 # Note: link order is important.  All raid personalities
 # and must come before md.o, as they each initialise 
@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR)		+= linear.o
 obj-$(CONFIG_MD_RAID0)		+= raid0.o
 obj-$(CONFIG_MD_RAID1)		+= raid1.o
 obj-$(CONFIG_MD_RAID10)		+= raid10.o
+obj-$(CONFIG_MD_RAID6_PQ)	+= raid6_pq.o
 obj-$(CONFIG_MD_RAID456)	+= raid456.o
 obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
 obj-$(CONFIG_MD_FAULTY)		+= faulty.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7199437..f8a9f7a 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -16,6 +16,7 @@
  * wait if count gets too high, wake when it drops to half.
  */
 
+#include <linux/blkdev.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -26,8 +27,8 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/buffer_head.h>
-#include <linux/raid/md.h>
-#include <linux/raid/bitmap.h>
+#include "md.h"
+#include "bitmap.h"
 
 /* debug macros */
 
@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
 	unsigned char *mappage;
 
 	if (page >= bitmap->pages) {
-		printk(KERN_ALERT
-			"%s: invalid bitmap page request: %lu (> %lu)\n",
-			bmname(bitmap), page, bitmap->pages-1);
+		/* This can happen if bitmap_start_sync goes beyond
+		 * End-of-device while looking for a whole page.
+		 * It is harmless.
+		 */
 		return -EINVAL;
 	}
 
@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
 	list_for_each_continue_rcu(pos, &mddev->disks) {
 		rdev = list_entry(pos, mdk_rdev_t, same_set);
 		if (rdev->raid_disk >= 0 &&
-		    test_bit(In_sync, &rdev->flags) &&
 		    !test_bit(Faulty, &rdev->flags)) {
 			/* this is a usable devices */
 			atomic_inc(&rdev->nr_pending);
@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 				    + size/512 > 0)
 					/* bitmap runs in to metadata */
 					goto bad_alignment;
-				if (rdev->data_offset + mddev->size*2
+				if (rdev->data_offset + mddev->dev_sectors
 				    > rdev->sb_start + bitmap->offset)
 					/* data runs in to bitmap */
 					goto bad_alignment;
@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
 		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
 		reason = "unrecognized superblock version";
-	else if (chunksize < PAGE_SIZE)
+	else if (chunksize < 512)
 		reason = "bitmap chunksize too small";
 	else if ((1 << ffz(~chunksize)) != chunksize)
 		reason = "bitmap chunksize not a power of 2";
@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 		PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
 		  atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
 	}
+	if (bitmap->mddev->degraded)
+		/* Never clear bits or update events_cleared when degraded */
+		success = 0;
 
 	while (sectors) {
 		int blocks;
@@ -1345,8 +1349,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 	}
 }
 
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
-			int degraded)
+static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+			       int degraded)
 {
 	bitmap_counter_t *bmc;
 	int rv;
@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
 	return rv;
 }
 
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+		      int degraded)
+{
+	/* bitmap_start_sync must always report on multiples of whole
+	 * pages, otherwise resync (which is very PAGE_SIZE based) will
+	 * get confused.
+	 * So call __bitmap_start_sync repeatedly (if needed) until
+	 * At least PAGE_SIZE>>9 blocks are covered.
+	 * Return the 'or' of the result.
+	 */
+	int rv = 0;
+	int blocks1;
+
+	*blocks = 0;
+	while (*blocks < (PAGE_SIZE>>9)) {
+		rv |= __bitmap_start_sync(bitmap, offset,
+					  &blocks1, degraded);
+		offset += blocks1;
+		*blocks += blocks1;
+	}
+	return rv;
+}
+
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
 {
 	bitmap_counter_t *bmc;
@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
 	wait_event(bitmap->mddev->recovery_wait,
 		   atomic_read(&bitmap->mddev->recovery_active) == 0);
 
+	bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
+	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
 	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
 	s = 0;
 	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
diff --git a/include/linux/raid/bitmap.h b/drivers/md/bitmap.h
index e989006..e989006 100644
--- a/include/linux/raid/bitmap.h
+++ b/drivers/md/bitmap.h
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index d4509be..345098b 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
 	bl->tail = bio;
 }
 
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+	bio->bi_next = bl->head;
+
+	bl->head = bio;
+
+	if (!bl->tail)
+		bl->tail = bio;
+}
+
 static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
 {
 	if (!bl2->head)
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index d3ec217..3a8cfa2 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -16,30 +16,56 @@
  * functions in this file help the target record and restore the
  * original bio state.
  */
+
+struct dm_bio_vec_details {
+#if PAGE_SIZE < 65536
+	__u16 bv_len;
+	__u16 bv_offset;
+#else
+	unsigned bv_len;
+	unsigned bv_offset;
+#endif
+};
+
 struct dm_bio_details {
 	sector_t bi_sector;
 	struct block_device *bi_bdev;
 	unsigned int bi_size;
 	unsigned short bi_idx;
 	unsigned long bi_flags;
+	struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
 };
 
 static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
 {
+	unsigned i;
+
 	bd->bi_sector = bio->bi_sector;
 	bd->bi_bdev = bio->bi_bdev;
 	bd->bi_size = bio->bi_size;
 	bd->bi_idx = bio->bi_idx;
 	bd->bi_flags = bio->bi_flags;
+
+	for (i = 0; i < bio->bi_vcnt; i++) {
+		bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
+		bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
+	}
 }
 
 static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
 {
+	unsigned i;
+
 	bio->bi_sector = bd->bi_sector;
 	bio->bi_bdev = bd->bi_bdev;
 	bio->bi_size = bd->bi_size;
 	bio->bi_idx = bd->bi_idx;
 	bio->bi_flags = bd->bi_flags;
+
+	for (i = 0; i < bio->bi_vcnt; i++) {
+		bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
+		bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
+	}
 }
 
 #endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bfefd07..53394e8 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1156,8 +1156,7 @@ bad_ivmode:
 	crypto_free_ablkcipher(tfm);
 bad_cipher:
 	/* Must zero key material before freeing */
-	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
-	kfree(cc);
+	kzfree(cc);
 	return -EINVAL;
 }
 
@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
 	dm_put_device(ti, cc->dev);
 
 	/* Must zero key material before freeing */
-	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
-	kfree(cc);
+	kzfree(cc);
 }
 
 static int crypt_map(struct dm_target *ti, struct bio *bio,
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index dccbfb0..a2e26c2 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -7,6 +7,7 @@
 
 #include "dm-exception-store.h"
 
+#include <linux/ctype.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
@@ -14,6 +15,257 @@
 
 #define DM_MSG_PREFIX "snapshot exception stores"
 
+static LIST_HEAD(_exception_store_types);
+static DEFINE_SPINLOCK(_lock);
+
+static struct dm_exception_store_type *__find_exception_store_type(const char *name)
+{
+	struct dm_exception_store_type *type;
+
+	list_for_each_entry(type, &_exception_store_types, list)
+		if (!strcmp(name, type->name))
+			return type;
+
+	return NULL;
+}
+
+static struct dm_exception_store_type *_get_exception_store_type(const char *name)
+{
+	struct dm_exception_store_type *type;
+
+	spin_lock(&_lock);
+
+	type = __find_exception_store_type(name);
+
+	if (type && !try_module_get(type->module))
+		type = NULL;
+
+	spin_unlock(&_lock);
+
+	return type;
+}
+
+/*
+ * get_type
+ * @type_name
+ *
+ * Attempt to retrieve the dm_exception_store_type by name.  If not already
+ * available, attempt to load the appropriate module.
+ *
+ * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
+ * Modules may contain multiple types.
+ * This function will first try the module "dm-exstore-<type_name>",
+ * then truncate 'type_name' on the last '-' and try again.
+ *
+ * For example, if type_name was "clustered-shared", it would search
+ * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
+ *
+ * 'dm-exception-store-<type_name>' is too long of a name in my
+ * opinion, which is why I've chosen to have the files
+ * containing exception store implementations be 'dm-exstore-<type_name>'.
+ * If you want your module to be autoloaded, you will follow this
+ * naming convention.
+ *
+ * Returns: dm_exception_store_type* on success, NULL on failure
+ */
+static struct dm_exception_store_type *get_type(const char *type_name)
+{
+	char *p, *type_name_dup;
+	struct dm_exception_store_type *type;
+
+	type = _get_exception_store_type(type_name);
+	if (type)
+		return type;
+
+	type_name_dup = kstrdup(type_name, GFP_KERNEL);
+	if (!type_name_dup) {
+		DMERR("No memory left to attempt load for \"%s\"", type_name);
+		return NULL;
+	}
+
+	while (request_module("dm-exstore-%s", type_name_dup) ||
+	       !(type = _get_exception_store_type(type_name))) {
+		p = strrchr(type_name_dup, '-');
+		if (!p)
+			break;
+		p[0] = '\0';
+	}
+
+	if (!type)
+		DMWARN("Module for exstore type \"%s\" not found.", type_name);
+
+	kfree(type_name_dup);
+
+	return type;
+}
+
+static void put_type(struct dm_exception_store_type *type)
+{
+	spin_lock(&_lock);
+	module_put(type->module);
+	spin_unlock(&_lock);
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type)
+{
+	int r = 0;
+
+	spin_lock(&_lock);
+	if (!__find_exception_store_type(type->name))
+		list_add(&type->list, &_exception_store_types);
+	else
+		r = -EEXIST;
+	spin_unlock(&_lock);
+
+	return r;
+}
+EXPORT_SYMBOL(dm_exception_store_type_register);
+
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
+{
+	spin_lock(&_lock);
+
+	if (!__find_exception_store_type(type->name)) {
+		spin_unlock(&_lock);
+		return -EINVAL;
+	}
+
+	list_del(&type->list);
+
+	spin_unlock(&_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(dm_exception_store_type_unregister);
+
+/*
+ * Round a number up to the nearest 'size' boundary.  size must
+ * be a power of 2.
+ */
+static ulong round_up(ulong n, ulong size)
+{
+	size--;
+	return (n + size) & ~size;
+}
+
+static int set_chunk_size(struct dm_exception_store *store,
+			  const char *chunk_size_arg, char **error)
+{
+	unsigned long chunk_size_ulong;
+	char *value;
+
+	chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
+	if (*chunk_size_arg == '\0' || *value != '\0') {
+		*error = "Invalid chunk size";
+		return -EINVAL;
+	}
+
+	if (!chunk_size_ulong) {
+		store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
+		return 0;
+	}
+
+	/*
+	 * Chunk size must be multiple of page size.  Silently
+	 * round up if it's not.
+	 */
+	chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
+
+	/* Check chunk_size is a power of 2 */
+	if (!is_power_of_2(chunk_size_ulong)) {
+		*error = "Chunk size is not a power of 2";
+		return -EINVAL;
+	}
+
+	/* Validate the chunk size against the device block size */
+	if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+		*error = "Chunk size is not a multiple of device blocksize";
+		return -EINVAL;
+	}
+
+	store->chunk_size = chunk_size_ulong;
+	store->chunk_mask = chunk_size_ulong - 1;
+	store->chunk_shift = ffs(chunk_size_ulong) - 1;
+
+	return 0;
+}
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+			      unsigned *args_used,
+			      struct dm_exception_store **store)
+{
+	int r = 0;
+	struct dm_exception_store_type *type;
+	struct dm_exception_store *tmp_store;
+	char persistent;
+
+	if (argc < 3) {
+		ti->error = "Insufficient exception store arguments";
+		return -EINVAL;
+	}
+
+	tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
+	if (!tmp_store) {
+		ti->error = "Exception store allocation failed";
+		return -ENOMEM;
+	}
+
+	persistent = toupper(*argv[1]);
+	if (persistent != 'P' && persistent != 'N') {
+		ti->error = "Persistent flag is not P or N";
+		return -EINVAL;
+	}
+
+	type = get_type(argv[1]);
+	if (!type) {
+		ti->error = "Exception store type not recognised";
+		r = -EINVAL;
+		goto bad_type;
+	}
+
+	tmp_store->type = type;
+	tmp_store->ti = ti;
+
+	r = dm_get_device(ti, argv[0], 0, 0,
+			  FMODE_READ | FMODE_WRITE, &tmp_store->cow);
+	if (r) {
+		ti->error = "Cannot get COW device";
+		goto bad_cow;
+	}
+
+	r = set_chunk_size(tmp_store, argv[2], &ti->error);
+	if (r)
+		goto bad_cow;
+
+	r = type->ctr(tmp_store, 0, NULL);
+	if (r) {
+		ti->error = "Exception store type constructor failed";
+		goto bad_ctr;
+	}
+
+	*args_used = 3;
+	*store = tmp_store;
+	return 0;
+
+bad_ctr:
+	dm_put_device(ti, tmp_store->cow);
+bad_cow:
+	put_type(type);
+bad_type:
+	kfree(tmp_store);
+	return r;
+}
+EXPORT_SYMBOL(dm_exception_store_create);
+
+void dm_exception_store_destroy(struct dm_exception_store *store)
+{
+	store->type->dtr(store);
+	dm_put_device(store->ti, store->cow);
+	put_type(store->type);
+	kfree(store);
+}
+EXPORT_SYMBOL(dm_exception_store_destroy);
+
 int dm_exception_store_init(void)
 {
 	int r;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb9f33d..0a2e6e7 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -37,11 +37,18 @@ struct dm_snap_exception {
  * Abstraction to handle the meta/layout of exception stores (the
  * COW device).
  */
-struct dm_exception_store {
+struct dm_exception_store;
+struct dm_exception_store_type {
+	const char *name;
+	struct module *module;
+
+	int (*ctr) (struct dm_exception_store *store,
+		    unsigned argc, char **argv);
+
 	/*
 	 * Destroys this object when you've finished with it.
 	 */
-	void (*destroy) (struct dm_exception_store *store);
+	void (*dtr) (struct dm_exception_store *store);
 
 	/*
 	 * The target shouldn't read the COW device until this is
@@ -72,8 +79,9 @@ struct dm_exception_store {
 	 */
 	void (*drop_snapshot) (struct dm_exception_store *store);
 
-	int (*status) (struct dm_exception_store *store, status_type_t status,
-		       char *result, unsigned int maxlen);
+	unsigned (*status) (struct dm_exception_store *store,
+			    status_type_t status, char *result,
+			    unsigned maxlen);
 
 	/*
 	 * Return how full the snapshot is.
@@ -82,7 +90,21 @@ struct dm_exception_store {
 			       sector_t *numerator,
 			       sector_t *denominator);
 
-	struct dm_snapshot *snap;
+	/* For internal device-mapper use only. */
+	struct list_head list;
+};
+
+struct dm_exception_store {
+	struct dm_exception_store_type *type;
+	struct dm_target *ti;
+
+	struct dm_dev *cow;
+
+	/* Size of data blocks saved - must be a power of 2 */
+	chunk_t chunk_size;
+	chunk_t chunk_mask;
+	chunk_t chunk_shift;
+
 	void *context;
 };
 
@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
 
 #  endif
 
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(struct block_device *bdev)
+{
+	return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+}
+
+static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
+				      sector_t sector)
+{
+	return (sector & ~store->chunk_mask) >> store->chunk_shift;
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type);
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+			      unsigned *args_used,
+			      struct dm_exception_store **store);
+void dm_exception_store_destroy(struct dm_exception_store *store);
+
 int dm_exception_store_init(void);
 void dm_exception_store_exit(void);
 
@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
 int dm_transient_snapshot_init(void);
 void dm_transient_snapshot_exit(void);
 
-int dm_create_persistent(struct dm_exception_store *store);
-
-int dm_create_transient(struct dm_exception_store *store);
-
 #endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 36e2b5e..e73aabd 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 
-		if (!atomic_read(&io.count) || signal_pending(current))
+		if (!atomic_read(&io.count))
 			break;
 
 		io_schedule();
 	}
 	set_current_state(TASK_RUNNING);
 
-	if (atomic_read(&io.count))
-		return -EINTR;
-
 	if (error_bits)
 		*error_bits = io.error_bits;
 
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 737961f..be233bc 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -16,40 +16,29 @@
 
 #define DM_MSG_PREFIX "dirty region log"
 
-struct dm_dirty_log_internal {
-	struct dm_dirty_log_type *type;
-
-	struct list_head list;
-	long use;
-};
-
 static LIST_HEAD(_log_types);
 static DEFINE_SPINLOCK(_lock);
 
-static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
 {
-	struct dm_dirty_log_internal *log_type;
+	struct dm_dirty_log_type *log_type;
 
 	list_for_each_entry(log_type, &_log_types, list)
-		if (!strcmp(name, log_type->type->name))
+		if (!strcmp(name, log_type->name))
 			return log_type;
 
 	return NULL;
 }
 
-static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
 {
-	struct dm_dirty_log_internal *log_type;
+	struct dm_dirty_log_type *log_type;
 
 	spin_lock(&_lock);
 
 	log_type = __find_dirty_log_type(name);
-	if (log_type) {
-		if (!log_type->use && !try_module_get(log_type->type->module))
-			log_type = NULL;
-		else
-			log_type->use++;
-	}
+	if (log_type && !try_module_get(log_type->module))
+		log_type = NULL;
 
 	spin_unlock(&_lock);
 
@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
 static struct dm_dirty_log_type *get_type(const char *type_name)
 {
 	char *p, *type_name_dup;
-	struct dm_dirty_log_internal *log_type;
+	struct dm_dirty_log_type *log_type;
 
 	if (!type_name)
 		return NULL;
 
 	log_type = _get_dirty_log_type(type_name);
 	if (log_type)
-		return log_type->type;
+		return log_type;
 
 	type_name_dup = kstrdup(type_name, GFP_KERNEL);
 	if (!type_name_dup) {
@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
 
 	kfree(type_name_dup);
 
-	return log_type ? log_type->type : NULL;
+	return log_type;
 }
 
 static void put_type(struct dm_dirty_log_type *type)
 {
-	struct dm_dirty_log_internal *log_type;
-
 	if (!type)
 		return;
 
 	spin_lock(&_lock);
-	log_type = __find_dirty_log_type(type->name);
-	if (!log_type)
+	if (!__find_dirty_log_type(type->name))
 		goto out;
 
-	if (!--log_type->use)
-		module_put(type->module);
-
-	BUG_ON(log_type->use < 0);
+	module_put(type->module);
 
 out:
 	spin_unlock(&_lock);
 }
 
-static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
-{
-	struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
-							 GFP_KERNEL);
-
-	if (log_type)
-		log_type->type = type;
-
-	return log_type;
-}
-
 int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
 {
-	struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
 	int r = 0;
 
-	if (!log_type)
-		return -ENOMEM;
-
 	spin_lock(&_lock);
 	if (!__find_dirty_log_type(type->name))
-		list_add(&log_type->list, &_log_types);
-	else {
-		kfree(log_type);
+		list_add(&type->list, &_log_types);
+	else
 		r = -EEXIST;
-	}
 	spin_unlock(&_lock);
 
 	return r;
@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
 
 int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
 {
-	struct dm_dirty_log_internal *log_type;
-
 	spin_lock(&_lock);
 
-	log_type = __find_dirty_log_type(type->name);
-	if (!log_type) {
+	if (!__find_dirty_log_type(type->name)) {
 		spin_unlock(&_lock);
 		return -EINVAL;
 	}
 
-	if (log_type->use) {
-		spin_unlock(&_lock);
-		return -ETXTBSY;
-	}
-
-	list_del(&log_type->list);
+	list_del(&type->list);
 
 	spin_unlock(&_lock);
-	kfree(log_type);
 
 	return 0;
 }
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 96ea226..42c04f0 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -17,9 +17,7 @@
 
 struct ps_internal {
 	struct path_selector_type pst;
-
 	struct list_head list;
-	long use;
 };
 
 #define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
 
 	down_read(&_ps_lock);
 	psi = __find_path_selector_type(name);
-	if (psi) {
-		if ((psi->use == 0) && !try_module_get(psi->pst.module))
-			psi = NULL;
-		else
-			psi->use++;
-	}
+	if (psi && !try_module_get(psi->pst.module))
+		psi = NULL;
 	up_read(&_ps_lock);
 
 	return psi;
@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
 	if (!psi)
 		goto out;
 
-	if (--psi->use == 0)
-		module_put(psi->pst.module);
-
-	BUG_ON(psi->use < 0);
-
+	module_put(psi->pst.module);
 out:
 	up_read(&_ps_lock);
 }
@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
 		return -EINVAL;
 	}
 
-	if (psi->use) {
-		up_write(&_ps_lock);
-		return -ETXTBSY;
-	}
-
 	list_del(&psi->list);
 
 	up_write(&_ps_lock);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4d6bc10..536ef0b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
 	struct dm_bio_details details;
 };
 
+static struct kmem_cache *_dm_raid1_read_record_cache;
+
 /*
  * Every mirror should look like this one.
  */
@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	int state;
 	struct bio *bio;
 	struct bio_list sync, nosync, recover, *this_list = NULL;
+	struct bio_list requeue;
+	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
+	region_t region;
 
 	if (!writes->head)
 		return;
@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	bio_list_init(&sync);
 	bio_list_init(&nosync);
 	bio_list_init(&recover);
+	bio_list_init(&requeue);
 
 	while ((bio = bio_list_pop(writes))) {
-		state = dm_rh_get_state(ms->rh,
-					dm_rh_bio_to_region(ms->rh, bio), 1);
+		region = dm_rh_bio_to_region(ms->rh, bio);
+
+		if (log->type->is_remote_recovering &&
+		    log->type->is_remote_recovering(log, region)) {
+			bio_list_add(&requeue, bio);
+			continue;
+		}
+
+		state = dm_rh_get_state(ms->rh, region, 1);
 		switch (state) {
 		case DM_RH_CLEAN:
 		case DM_RH_DIRTY:
@@ -619,6 +632,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	}
 
 	/*
+	 * Add bios that are delayed due to remote recovery
+	 * back on to the write queue
+	 */
+	if (unlikely(requeue.head)) {
+		spin_lock_irq(&ms->lock);
+		bio_list_merge(&ms->writes, &requeue);
+		spin_unlock_irq(&ms->lock);
+	}
+
+	/*
 	 * Increment the pending counts for any regions that will
 	 * be written to (writes to recover regions are going to
 	 * be delayed).
@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 	atomic_set(&ms->suspend, 0);
 	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
 
-	len = sizeof(struct dm_raid1_read_record);
-	ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
-							   len);
+	ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
+						_dm_raid1_read_record_cache);
+
 	if (!ms->read_record_pool) {
 		ti->error = "Error creating mirror read_record_pool";
 		kfree(ms);
@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
 {
 	int r;
 
+	_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
+	if (!_dm_raid1_read_record_cache) {
+		DMERR("Can't allocate dm_raid1_read_record cache");
+		r = -ENOMEM;
+		goto bad_cache;
+	}
+
 	r = dm_register_target(&mirror_target);
-	if (r < 0)
+	if (r < 0) {
 		DMERR("Failed to register mirror target");
+		goto bad_target;
+	}
+
+	return 0;
 
+bad_target:
+	kmem_cache_destroy(_dm_raid1_read_record_cache);
+bad_cache:
 	return r;
 }
 
 static void __exit dm_mirror_exit(void)
 {
 	dm_unregister_target(&mirror_target);
+	kmem_cache_destroy(_dm_raid1_read_record_cache);
 }
 
 /* Module hooks */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 936b34e..e75c6dd 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -6,7 +6,6 @@
  */
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -89,7 +88,7 @@ struct commit_callback {
  * The top level structure for a persistent exception store.
  */
 struct pstore {
-	struct dm_snapshot *snap;	/* up pointer to my snapshot */
+	struct dm_exception_store *store;
 	int version;
 	int valid;
 	uint32_t exceptions_per_area;
@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
 	int r = -ENOMEM;
 	size_t len;
 
-	len = ps->snap->chunk_size << SECTOR_SHIFT;
+	len = ps->store->chunk_size << SECTOR_SHIFT;
 
 	/*
 	 * Allocate the chunk_size block of memory that will hold
@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
 
 static void free_area(struct pstore *ps)
 {
-	vfree(ps->area);
+	if (ps->area)
+		vfree(ps->area);
 	ps->area = NULL;
-	vfree(ps->zero_area);
+
+	if (ps->zero_area)
+		vfree(ps->zero_area);
 	ps->zero_area = NULL;
 }
 
@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
 static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
 {
 	struct dm_io_region where = {
-		.bdev = ps->snap->cow->bdev,
-		.sector = ps->snap->chunk_size * chunk,
-		.count = ps->snap->chunk_size,
+		.bdev = ps->store->cow->bdev,
+		.sector = ps->store->chunk_size * chunk,
+		.count = ps->store->chunk_size,
 	};
 	struct dm_io_request io_req = {
 		.bi_rw = rw,
@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
 
 static void zero_memory_area(struct pstore *ps)
 {
-	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 }
 
 static int zero_disk_area(struct pstore *ps, chunk_t area)
 {
 	struct dm_io_region where = {
-		.bdev = ps->snap->cow->bdev,
-		.sector = ps->snap->chunk_size * area_location(ps, area),
-		.count = ps->snap->chunk_size,
+		.bdev = ps->store->cow->bdev,
+		.sector = ps->store->chunk_size * area_location(ps, area),
+		.count = ps->store->chunk_size,
 	};
 	struct dm_io_request io_req = {
 		.bi_rw = WRITE,
@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	/*
 	 * Use default chunk size (or hardsect_size, if larger) if none supplied
 	 */
-	if (!ps->snap->chunk_size) {
-		ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
-		ps->snap->chunk_mask = ps->snap->chunk_size - 1;
-		ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
+	if (!ps->store->chunk_size) {
+		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
+		    bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+		ps->store->chunk_mask = ps->store->chunk_size - 1;
+		ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 		chunk_size_supplied = 0;
 	}
 
-	ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
+	ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
 							     chunk_size));
 	if (IS_ERR(ps->io_client))
 		return PTR_ERR(ps->io_client);
@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	ps->version = le32_to_cpu(dh->version);
 	chunk_size = le32_to_cpu(dh->chunk_size);
 
-	if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
+	if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
 		return 0;
 
 	DMWARN("chunk size %llu in device metadata overrides "
 	       "table chunk size of %llu.",
 	       (unsigned long long)chunk_size,
-	       (unsigned long long)ps->snap->chunk_size);
+	       (unsigned long long)ps->store->chunk_size);
 
 	/* We had a bogus chunk_size. Fix stuff up. */
 	free_area(ps);
 
-	ps->snap->chunk_size = chunk_size;
-	ps->snap->chunk_mask = chunk_size - 1;
-	ps->snap->chunk_shift = ffs(chunk_size) - 1;
+	ps->store->chunk_size = chunk_size;
+	ps->store->chunk_mask = chunk_size - 1;
+	ps->store->chunk_shift = ffs(chunk_size) - 1;
 
-	r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
+	r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
 				ps->io_client);
 	if (r)
 		return r;
@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
 {
 	struct disk_header *dh;
 
-	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 
 	dh = (struct disk_header *) ps->area;
 	dh->magic = cpu_to_le32(SNAP_MAGIC);
 	dh->valid = cpu_to_le32(ps->valid);
 	dh->version = cpu_to_le32(ps->version);
-	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
+	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
 	return chunk_io(ps, 0, WRITE, 1);
 }
@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
 static void persistent_fraction_full(struct dm_exception_store *store,
 				     sector_t *numerator, sector_t *denominator)
 {
-	*numerator = get_info(store)->next_free * store->snap->chunk_size;
-	*denominator = get_dev_size(store->snap->cow->bdev);
+	*numerator = get_info(store)->next_free * store->chunk_size;
+	*denominator = get_dev_size(store->cow->bdev);
 }
 
-static void persistent_destroy(struct dm_exception_store *store)
+static void persistent_dtr(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
 
 	destroy_workqueue(ps->metadata_wq);
-	dm_io_client_destroy(ps->io_client);
-	vfree(ps->callbacks);
+
+	/* Created in read_header */
+	if (ps->io_client)
+		dm_io_client_destroy(ps->io_client);
 	free_area(ps);
+
+	/* Allocated in persistent_read_metadata */
+	if (ps->callbacks)
+		vfree(ps->callbacks);
+
 	kfree(ps);
 }
 
@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
 	/*
 	 * Now we know correct chunk_size, complete the initialisation.
 	 */
-	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
 				  sizeof(struct disk_exception);
 	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
 			sizeof(*ps->callbacks));
@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
 	struct pstore *ps = get_info(store);
 	uint32_t stride;
 	chunk_t next_free;
-	sector_t size = get_dev_size(store->snap->cow->bdev);
+	sector_t size = get_dev_size(store->cow->bdev);
 
 	/* Is there enough room ? */
-	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
+	if (size < ((ps->next_free + 1) * store->chunk_size))
 		return -ENOSPC;
 
 	e->new_chunk = ps->next_free;
@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
 		DMWARN("write header failed");
 }
 
-int dm_create_persistent(struct dm_exception_store *store)
+static int persistent_ctr(struct dm_exception_store *store,
+			  unsigned argc, char **argv)
 {
 	struct pstore *ps;
 
 	/* allocate the pstore */
-	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
 	if (!ps)
 		return -ENOMEM;
 
-	ps->snap = store->snap;
+	ps->store = store;
 	ps->valid = 1;
 	ps->version = SNAPSHOT_DISK_VERSION;
 	ps->area = NULL;
@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
 		return -ENOMEM;
 	}
 
-	store->destroy = persistent_destroy;
-	store->read_metadata = persistent_read_metadata;
-	store->prepare_exception = persistent_prepare_exception;
-	store->commit_exception = persistent_commit_exception;
-	store->drop_snapshot = persistent_drop_snapshot;
-	store->fraction_full = persistent_fraction_full;
 	store->context = ps;
 
 	return 0;
 }
 
+static unsigned persistent_status(struct dm_exception_store *store,
+				  status_type_t status, char *result,
+				  unsigned maxlen)
+{
+	unsigned sz = 0;
+
+	switch (status) {
+	case STATUSTYPE_INFO:
+		break;
+	case STATUSTYPE_TABLE:
+		DMEMIT(" %s P %llu", store->cow->name,
+		       (unsigned long long)store->chunk_size);
+	}
+
+	return sz;
+}
+
+static struct dm_exception_store_type _persistent_type = {
+	.name = "persistent",
+	.module = THIS_MODULE,
+	.ctr = persistent_ctr,
+	.dtr = persistent_dtr,
+	.read_metadata = persistent_read_metadata,
+	.prepare_exception = persistent_prepare_exception,
+	.commit_exception = persistent_commit_exception,
+	.drop_snapshot = persistent_drop_snapshot,
+	.fraction_full = persistent_fraction_full,
+	.status = persistent_status,
+};
+
+static struct dm_exception_store_type _persistent_compat_type = {
+	.name = "P",
+	.module = THIS_MODULE,
+	.ctr = persistent_ctr,
+	.dtr = persistent_dtr,
+	.read_metadata = persistent_read_metadata,
+	.prepare_exception = persistent_prepare_exception,
+	.commit_exception = persistent_commit_exception,
+	.drop_snapshot = persistent_drop_snapshot,
+	.fraction_full = persistent_fraction_full,
+	.status = persistent_status,
+};
+
 int dm_persistent_snapshot_init(void)
 {
-	return 0;
+	int r;
+
+	r = dm_exception_store_type_register(&_persistent_type);
+	if (r) {
+		DMERR("Unable to register persistent exception store type");
+		return r;
+	}
+
+	r = dm_exception_store_type_register(&_persistent_compat_type);
+	if (r) {
+		DMERR("Unable to register old-style persistent exception "
+		      "store type");
+		dm_exception_store_type_unregister(&_persistent_type);
+		return r;
+	}
+
+	return r;
 }
 
 void dm_persistent_snapshot_exit(void)
 {
+	dm_exception_store_type_unregister(&_persistent_type);
+	dm_exception_store_type_unregister(&_persistent_compat_type);
 }
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 7f6e2e6..cde5aa5 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -6,7 +6,6 @@
  */
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -23,7 +22,7 @@ struct transient_c {
 	sector_t next_free;
 };
 
-static void transient_destroy(struct dm_exception_store *store)
+static void transient_dtr(struct dm_exception_store *store)
 {
 	kfree(store->context);
 }
@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
 static int transient_prepare_exception(struct dm_exception_store *store,
 				       struct dm_snap_exception *e)
 {
-	struct transient_c *tc = (struct transient_c *) store->context;
-	sector_t size = get_dev_size(store->snap->cow->bdev);
+	struct transient_c *tc = store->context;
+	sector_t size = get_dev_size(store->cow->bdev);
 
-	if (size < (tc->next_free + store->snap->chunk_size))
+	if (size < (tc->next_free + store->chunk_size))
 		return -1;
 
-	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
-	tc->next_free += store->snap->chunk_size;
+	e->new_chunk = sector_to_chunk(store, tc->next_free);
+	tc->next_free += store->chunk_size;
 
 	return 0;
 }
@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
 				    sector_t *numerator, sector_t *denominator)
 {
 	*numerator = ((struct transient_c *) store->context)->next_free;
-	*denominator = get_dev_size(store->snap->cow->bdev);
+	*denominator = get_dev_size(store->cow->bdev);
 }
 
-int dm_create_transient(struct dm_exception_store *store)
+static int transient_ctr(struct dm_exception_store *store,
+			 unsigned argc, char **argv)
 {
 	struct transient_c *tc;
 
-	store->destroy = transient_destroy;
-	store->read_metadata = transient_read_metadata;
-	store->prepare_exception = transient_prepare_exception;
-	store->commit_exception = transient_commit_exception;
-	store->drop_snapshot = NULL;
-	store->fraction_full = transient_fraction_full;
-
 	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
 	if (!tc)
 		return -ENOMEM;
@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
 	return 0;
 }
 
+static unsigned transient_status(struct dm_exception_store *store,
+				 status_type_t status, char *result,
+				 unsigned maxlen)
+{
+	unsigned sz = 0;
+
+	switch (status) {
+	case STATUSTYPE_INFO:
+		break;
+	case STATUSTYPE_TABLE:
+		DMEMIT(" %s N %llu", store->cow->name,
+		       (unsigned long long)store->chunk_size);
+	}
+
+	return sz;
+}
+
+static struct dm_exception_store_type _transient_type = {
+	.name = "transient",
+	.module = THIS_MODULE,
+	.ctr = transient_ctr,
+	.dtr = transient_dtr,
+	.read_metadata = transient_read_metadata,
+	.prepare_exception = transient_prepare_exception,
+	.commit_exception = transient_commit_exception,
+	.fraction_full = transient_fraction_full,
+	.status = transient_status,
+};
+
+static struct dm_exception_store_type _transient_compat_type = {
+	.name = "N",
+	.module = THIS_MODULE,
+	.ctr = transient_ctr,
+	.dtr = transient_dtr,
+	.read_metadata = transient_read_metadata,
+	.prepare_exception = transient_prepare_exception,
+	.commit_exception = transient_commit_exception,
+	.fraction_full = transient_fraction_full,
+	.status = transient_status,
+};
+
 int dm_transient_snapshot_init(void)
 {
-	return 0;
+	int r;
+
+	r = dm_exception_store_type_register(&_transient_type);
+	if (r) {
+		DMWARN("Unable to register transient exception store type");
+		return r;
+	}
+
+	r = dm_exception_store_type_register(&_transient_compat_type);
+	if (r) {
+		DMWARN("Unable to register old-style transient "
+		       "exception store type");
+		dm_exception_store_type_unregister(&_transient_type);
+		return r;
+	}
+
+	return r;
 }
 
 void dm_transient_snapshot_exit(void)
 {
+	dm_exception_store_type_unregister(&_transient_type);
+	dm_exception_store_type_unregister(&_transient_compat_type);
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 65ff82f..981a041 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/blkdev.h>
-#include <linux/ctype.h>
 #include <linux/device-mapper.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
@@ -20,9 +19,9 @@
 #include <linux/vmalloc.h>
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 #include "dm-bio-list.h"
 
 #define DM_MSG_PREFIX "snapshots"
@@ -47,9 +46,76 @@
  */
 #define MIN_IOS 256
 
+#define DM_TRACKED_CHUNK_HASH_SIZE	16
+#define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
+					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+	uint32_t hash_mask;
+	unsigned hash_shift;
+	struct list_head *table;
+};
+
+struct dm_snapshot {
+	struct rw_semaphore lock;
+
+	struct dm_dev *origin;
+
+	/* List of snapshots per Origin */
+	struct list_head list;
+
+	/* You can't use a snapshot if this is 0 (e.g. if full) */
+	int valid;
+
+	/* Origin writes don't trigger exceptions until this is set */
+	int active;
+
+	mempool_t *pending_pool;
+
+	atomic_t pending_exceptions_count;
+
+	struct exception_table pending;
+	struct exception_table complete;
+
+	/*
+	 * pe_lock protects all pending_exception operations and access
+	 * as well as the snapshot_bios list.
+	 */
+	spinlock_t pe_lock;
+
+	/* The on disk metadata handler */
+	struct dm_exception_store *store;
+
+	struct dm_kcopyd_client *kcopyd_client;
+
+	/* Queue of snapshot writes for ksnapd to flush */
+	struct bio_list queued_bios;
+	struct work_struct queued_bios_work;
+
+	/* Chunks with outstanding reads */
+	mempool_t *tracked_chunk_pool;
+	spinlock_t tracked_chunk_lock;
+	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
 static struct workqueue_struct *ksnapd;
 static void flush_queued_bios(struct work_struct *work);
 
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+				chunk_t chunk)
+{
+	return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+	/*
+	 * There is only ever one instance of a particular block
+	 * device so we can compare pointers safely.
+	 */
+	return lhs == rhs;
+}
+
 struct dm_snap_pending_exception {
 	struct dm_snap_exception e;
 
@@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
 	 * Calculate based on the size of the original volume or
 	 * the COW volume...
 	 */
-	cow_dev_size = get_dev_size(s->cow->bdev);
+	cow_dev_size = get_dev_size(s->store->cow->bdev);
 	origin_dev_size = get_dev_size(s->origin->bdev);
 	max_buckets = calc_max_buckets();
 
-	hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+	hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
 	hash_size = min(hash_size, max_buckets);
 
 	hash_size = rounddown_pow_of_two(hash_size);
@@ -505,58 +571,6 @@ static int init_hash_tables(struct dm_snapshot *s)
 }
 
 /*
- * Round a number up to the nearest 'size' boundary.  size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
-	size--;
-	return (n + size) & ~size;
-}
-
-static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
-			  char **error)
-{
-	unsigned long chunk_size;
-	char *value;
-
-	chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
-	if (*chunk_size_arg == '\0' || *value != '\0') {
-		*error = "Invalid chunk size";
-		return -EINVAL;
-	}
-
-	if (!chunk_size) {
-		s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
-		return 0;
-	}
-
-	/*
-	 * Chunk size must be multiple of page size.  Silently
-	 * round up if it's not.
-	 */
-	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
-	/* Check chunk_size is a power of 2 */
-	if (!is_power_of_2(chunk_size)) {
-		*error = "Chunk size is not a power of 2";
-		return -EINVAL;
-	}
-
-	/* Validate the chunk size against the device block size */
-	if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
-		*error = "Chunk size is not a multiple of device blocksize";
-		return -EINVAL;
-	}
-
-	s->chunk_size = chunk_size;
-	s->chunk_mask = chunk_size - 1;
-	s->chunk_shift = ffs(chunk_size) - 1;
-
-	return 0;
-}
-
-/*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
@@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	struct dm_snapshot *s;
 	int i;
 	int r = -EINVAL;
-	char persistent;
 	char *origin_path;
-	char *cow_path;
+	struct dm_exception_store *store;
+	unsigned args_used;
 
 	if (argc != 4) {
 		ti->error = "requires exactly 4 arguments";
 		r = -EINVAL;
-		goto bad1;
+		goto bad_args;
 	}
 
 	origin_path = argv[0];
-	cow_path = argv[1];
-	persistent = toupper(*argv[2]);
+	argv++;
+	argc--;
 
-	if (persistent != 'P' && persistent != 'N') {
-		ti->error = "Persistent flag is not P or N";
+	r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+	if (r) {
+		ti->error = "Couldn't create exception store";
 		r = -EINVAL;
-		goto bad1;
+		goto bad_args;
 	}
 
+	argv += args_used;
+	argc -= args_used;
+
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL) {
+	if (!s) {
 		ti->error = "Cannot allocate snapshot context private "
 		    "structure";
 		r = -ENOMEM;
-		goto bad1;
+		goto bad_snap;
 	}
 
 	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
 	if (r) {
 		ti->error = "Cannot get origin device";
-		goto bad2;
-	}
-
-	r = dm_get_device(ti, cow_path, 0, 0,
-			  FMODE_READ | FMODE_WRITE, &s->cow);
-	if (r) {
-		dm_put_device(ti, s->origin);
-		ti->error = "Cannot get COW device";
-		goto bad2;
+		goto bad_origin;
 	}
 
-	r = set_chunk_size(s, argv[3], &ti->error);
-	if (r)
-		goto bad3;
-
-	s->type = persistent;
-
+	s->store = store;
 	s->valid = 1;
 	s->active = 0;
 	atomic_set(&s->pending_exceptions_count, 0);
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
-	s->ti = ti;
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
 		ti->error = "Unable to allocate hash table space";
 		r = -ENOMEM;
-		goto bad3;
-	}
-
-	s->store.snap = s;
-
-	if (persistent == 'P')
-		r = dm_create_persistent(&s->store);
-	else
-		r = dm_create_transient(&s->store);
-
-	if (r) {
-		ti->error = "Couldn't create exception store";
-		r = -EINVAL;
-		goto bad4;
+		goto bad_hash_tables;
 	}
 
 	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
 	if (r) {
 		ti->error = "Could not create kcopyd client";
-		goto bad5;
+		goto bad_kcopyd;
 	}
 
 	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
 	if (!s->pending_pool) {
 		ti->error = "Could not allocate mempool for pending exceptions";
-		goto bad6;
+		goto bad_pending_pool;
 	}
 
 	s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	spin_lock_init(&s->tracked_chunk_lock);
 
 	/* Metadata must only be loaded into one table at once */
-	r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
+	r = s->store->type->read_metadata(s->store, dm_add_exception,
+					  (void *)s);
 	if (r < 0) {
 		ti->error = "Failed to read snapshot metadata";
 		goto bad_load_and_register;
@@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = s;
-	ti->split_io = s->chunk_size;
+	ti->split_io = s->store->chunk_size;
 
 	return 0;
 
- bad_load_and_register:
+bad_load_and_register:
 	mempool_destroy(s->tracked_chunk_pool);
 
- bad_tracked_chunk_pool:
+bad_tracked_chunk_pool:
 	mempool_destroy(s->pending_pool);
 
- bad6:
+bad_pending_pool:
 	dm_kcopyd_client_destroy(s->kcopyd_client);
 
- bad5:
-	s->store.destroy(&s->store);
-
- bad4:
+bad_kcopyd:
 	exit_exception_table(&s->pending, pending_cache);
 	exit_exception_table(&s->complete, exception_cache);
 
- bad3:
-	dm_put_device(ti, s->cow);
+bad_hash_tables:
 	dm_put_device(ti, s->origin);
 
- bad2:
+bad_origin:
 	kfree(s);
 
- bad1:
+bad_snap:
+	dm_exception_store_destroy(store);
+
+bad_args:
 	return r;
 }
 
@@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
 
 	exit_exception_table(&s->pending, pending_cache);
 	exit_exception_table(&s->complete, exception_cache);
-
-	s->store.destroy(&s->store);
 }
 
 static void snapshot_dtr(struct dm_target *ti)
@@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
 	mempool_destroy(s->pending_pool);
 
 	dm_put_device(ti, s->origin);
-	dm_put_device(ti, s->cow);
+
+	dm_exception_store_destroy(s->store);
 
 	kfree(s);
 }
@@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
 	else if (err == -ENOMEM)
 		DMERR("Invalidating snapshot: Unable to allocate exception.");
 
-	if (s->store.drop_snapshot)
-		s->store.drop_snapshot(&s->store);
+	if (s->store->type->drop_snapshot)
+		s->store->type->drop_snapshot(s->store);
 
 	s->valid = 0;
 
-	dm_table_event(s->ti->table);
+	dm_table_event(s->store->ti->table);
 }
 
 static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
 
 	else
 		/* Update the metadata if we are persistent */
-		s->store.commit_exception(&s->store, &pe->e, commit_callback,
-					  pe);
+		s->store->type->commit_exception(s->store, &pe->e,
+						 commit_callback, pe);
 }
 
 /*
@@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
 	dev_size = get_dev_size(bdev);
 
 	src.bdev = bdev;
-	src.sector = chunk_to_sector(s, pe->e.old_chunk);
-	src.count = min(s->chunk_size, dev_size - src.sector);
+	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
+	src.count = min(s->store->chunk_size, dev_size - src.sector);
 
-	dest.bdev = s->cow->bdev;
-	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+	dest.bdev = s->store->cow->bdev;
+	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
 	dest.count = src.count;
 
 	/* Hand over to kcopyd */
@@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
 		    &src, 1, &dest, 0, copy_callback, pe);
 }
 
+static struct dm_snap_pending_exception *
+__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
+{
+	struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
+
+	if (!e)
+		return NULL;
+
+	return container_of(e, struct dm_snap_pending_exception, e);
+}
+
 /*
  * Looks to see if this snapshot already has a pending exception
  * for this chunk, otherwise it allocates a new one and inserts
@@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
  * this.
  */
 static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+__find_pending_exception(struct dm_snapshot *s,
+			 struct dm_snap_pending_exception *pe, chunk_t chunk)
 {
-	struct dm_snap_exception *e;
-	struct dm_snap_pending_exception *pe;
-	chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
+	struct dm_snap_pending_exception *pe2;
 
-	/*
-	 * Is there a pending exception for this already ?
-	 */
-	e = lookup_exception(&s->pending, chunk);
-	if (e) {
-		/* cast the exception to a pending exception */
-		pe = container_of(e, struct dm_snap_pending_exception, e);
-		goto out;
-	}
-
-	/*
-	 * Create a new pending exception, we don't want
-	 * to hold the lock while we do this.
-	 */
-	up_write(&s->lock);
-	pe = alloc_pending_exception(s);
-	down_write(&s->lock);
-
-	if (!s->valid) {
-		free_pending_exception(pe);
-		return NULL;
-	}
-
-	e = lookup_exception(&s->pending, chunk);
-	if (e) {
+	pe2 = __lookup_pending_exception(s, chunk);
+	if (pe2) {
 		free_pending_exception(pe);
-		pe = container_of(e, struct dm_snap_pending_exception, e);
-		goto out;
+		return pe2;
 	}
 
 	pe->e.old_chunk = chunk;
@@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
 	atomic_set(&pe->ref_count, 0);
 	pe->started = 0;
 
-	if (s->store.prepare_exception(&s->store, &pe->e)) {
+	if (s->store->type->prepare_exception(s->store, &pe->e)) {
 		free_pending_exception(pe);
 		return NULL;
 	}
@@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
 	get_pending_exception(pe);
 	insert_exception(&s->pending, &pe->e);
 
- out:
 	return pe;
 }
 
 static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
 			    struct bio *bio, chunk_t chunk)
 {
-	bio->bi_bdev = s->cow->bdev;
-	bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
-			 (chunk - e->old_chunk)) +
-			 (bio->bi_sector & s->chunk_mask);
+	bio->bi_bdev = s->store->cow->bdev;
+	bio->bi_sector = chunk_to_sector(s->store,
+					 dm_chunk_number(e->new_chunk) +
+					 (chunk - e->old_chunk)) +
+					 (bio->bi_sector &
+					  s->store->chunk_mask);
 }
 
 static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 	chunk_t chunk;
 	struct dm_snap_pending_exception *pe = NULL;
 
-	chunk = sector_to_chunk(s, bio->bi_sector);
+	chunk = sector_to_chunk(s->store, bio->bi_sector);
 
 	/* Full snapshots are not usable */
 	/* To get here the table must be live so s->active is always set. */
@@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 	 * writeable.
 	 */
 	if (bio_rw(bio) == WRITE) {
-		pe = __find_pending_exception(s, bio);
+		pe = __lookup_pending_exception(s, chunk);
 		if (!pe) {
-			__invalidate_snapshot(s, -ENOMEM);
-			r = -EIO;
-			goto out_unlock;
+			up_write(&s->lock);
+			pe = alloc_pending_exception(s);
+			down_write(&s->lock);
+
+			if (!s->valid) {
+				free_pending_exception(pe);
+				r = -EIO;
+				goto out_unlock;
+			}
+
+			e = lookup_exception(&s->complete, chunk);
+			if (e) {
+				free_pending_exception(pe);
+				remap_exception(s, e, bio, chunk);
+				goto out_unlock;
+			}
+
+			pe = __find_pending_exception(s, pe, chunk);
+			if (!pe) {
+				__invalidate_snapshot(s, -ENOMEM);
+				r = -EIO;
+				goto out_unlock;
+			}
 		}
 
 		remap_exception(s, &pe->e, bio, chunk);
@@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
 static int snapshot_status(struct dm_target *ti, status_type_t type,
 			   char *result, unsigned int maxlen)
 {
+	unsigned sz = 0;
 	struct dm_snapshot *snap = ti->private;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
 		if (!snap->valid)
-			snprintf(result, maxlen, "Invalid");
+			DMEMIT("Invalid");
 		else {
-			if (snap->store.fraction_full) {
+			if (snap->store->type->fraction_full) {
 				sector_t numerator, denominator;
-				snap->store.fraction_full(&snap->store,
-							  &numerator,
-							  &denominator);
-				snprintf(result, maxlen, "%llu/%llu",
-					(unsigned long long)numerator,
-					(unsigned long long)denominator);
+				snap->store->type->fraction_full(snap->store,
+								 &numerator,
+								 &denominator);
+				DMEMIT("%llu/%llu",
+				       (unsigned long long)numerator,
+				       (unsigned long long)denominator);
 			}
 			else
-				snprintf(result, maxlen, "Unknown");
+				DMEMIT("Unknown");
 		}
 		break;
 
@@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 		 * to make private copies if the output is to
 		 * make sense.
 		 */
-		snprintf(result, maxlen, "%s %s %c %llu",
-			 snap->origin->name, snap->cow->name,
-			 snap->type,
-			 (unsigned long long)snap->chunk_size);
+		DMEMIT("%s", snap->origin->name);
+		snap->store->type->status(snap->store, type, result + sz,
+					  maxlen - sz);
 		break;
 	}
 
@@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 			goto next_snapshot;
 
 		/* Nothing to do if writing beyond end of snapshot */
-		if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+		if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
 			goto next_snapshot;
 
 		/*
 		 * Remember, different snapshots can have
 		 * different chunk sizes.
 		 */
-		chunk = sector_to_chunk(snap, bio->bi_sector);
+		chunk = sector_to_chunk(snap->store, bio->bi_sector);
 
 		/*
 		 * Check exception table to see if block
@@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 		if (e)
 			goto next_snapshot;
 
-		pe = __find_pending_exception(snap, bio);
+		pe = __lookup_pending_exception(snap, chunk);
 		if (!pe) {
-			__invalidate_snapshot(snap, -ENOMEM);
-			goto next_snapshot;
+			up_write(&snap->lock);
+			pe = alloc_pending_exception(snap);
+			down_write(&snap->lock);
+
+			if (!snap->valid) {
+				free_pending_exception(pe);
+				goto next_snapshot;
+			}
+
+			e = lookup_exception(&snap->complete, chunk);
+			if (e) {
+				free_pending_exception(pe);
+				goto next_snapshot;
+			}
+
+			pe = __find_pending_exception(snap, pe, chunk);
+			if (!pe) {
+				__invalidate_snapshot(snap, -ENOMEM);
+				goto next_snapshot;
+			}
 		}
 
 		if (!primary_pe) {
@@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
 	o = __lookup_origin(dev->bdev);
 	if (o)
 		list_for_each_entry (snap, &o->snapshots, list)
-			chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+			chunk_size = min_not_zero(chunk_size,
+						  snap->store->chunk_size);
 	up_read(&_origins_lock);
 
 	ti->split_io = chunk_size;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
deleted file mode 100644
index d9e62b4..0000000
--- a/drivers/md/dm-snap.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include <linux/device-mapper.h>
-#include "dm-exception-store.h"
-#include "dm-bio-list.h"
-#include <linux/blkdev.h>
-#include <linux/workqueue.h>
-
-struct exception_table {
-	uint32_t hash_mask;
-	unsigned hash_shift;
-	struct list_head *table;
-};
-
-#define DM_TRACKED_CHUNK_HASH_SIZE	16
-#define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
-					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
-
-struct dm_snapshot {
-	struct rw_semaphore lock;
-	struct dm_target *ti;
-
-	struct dm_dev *origin;
-	struct dm_dev *cow;
-
-	/* List of snapshots per Origin */
-	struct list_head list;
-
-	/* Size of data blocks saved - must be a power of 2 */
-	chunk_t chunk_size;
-	chunk_t chunk_mask;
-	chunk_t chunk_shift;
-
-	/* You can't use a snapshot if this is 0 (e.g. if full) */
-	int valid;
-
-	/* Origin writes don't trigger exceptions until this is set */
-	int active;
-
-	/* Used for display of table */
-	char type;
-
-	mempool_t *pending_pool;
-
-	atomic_t pending_exceptions_count;
-
-	struct exception_table pending;
-	struct exception_table complete;
-
-	/*
-	 * pe_lock protects all pending_exception operations and access
-	 * as well as the snapshot_bios list.
-	 */
-	spinlock_t pe_lock;
-
-	/* The on disk metadata handler */
-	struct dm_exception_store store;
-
-	struct dm_kcopyd_client *kcopyd_client;
-
-	/* Queue of snapshot writes for ksnapd to flush */
-	struct bio_list queued_bios;
-	struct work_struct queued_bios_work;
-
-	/* Chunks with outstanding reads */
-	mempool_t *tracked_chunk_pool;
-	spinlock_t tracked_chunk_lock;
-	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
-};
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
-	return bdev->bd_inode->i_size >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
-	return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
-	return chunk << s->chunk_shift;
-}
-
-static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
-	/*
-	 * There is only ever one instance of a particular block
-	 * device so we can compare pointers safely.
-	 */
-	return lhs == rhs;
-}
-
-#endif
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2fd66c3..e8361b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
 }
 
 /*
- * This upgrades the mode on an already open dm_dev.  Being
+ * This upgrades the mode on an already open dm_dev, being
  * careful to leave things as they were if we fail to reopen the
- * device.
+ * device and not to touch the existing bdev field in case
+ * it is accessed concurrently inside dm_table_any_congested().
  */
 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 			struct mapped_device *md)
 {
 	int r;
-	struct dm_dev_internal dd_copy;
-	dev_t dev = dd->dm_dev.bdev->bd_dev;
+	struct dm_dev_internal dd_new, dd_old;
 
-	dd_copy = *dd;
+	dd_new = dd_old = *dd;
+
+	dd_new.dm_dev.mode |= new_mode;
+	dd_new.dm_dev.bdev = NULL;
+
+	r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
+	if (r)
+		return r;
 
 	dd->dm_dev.mode |= new_mode;
-	dd->dm_dev.bdev = NULL;
-	r = open_dev(dd, dev, md);
-	if (!r)
-		close_dev(&dd_copy, md);
-	else
-		*dd = dd_copy;
+	close_dev(&dd_old, md);
 
-	return r;
+	return 0;
 }
 
 /*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 7decf10..04feccf 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -14,45 +14,34 @@
 
 #define DM_MSG_PREFIX "target"
 
-struct tt_internal {
-	struct target_type tt;
-
-	struct list_head list;
-	long use;
-};
-
 static LIST_HEAD(_targets);
 static DECLARE_RWSEM(_lock);
 
 #define DM_MOD_NAME_SIZE 32
 
-static inline struct tt_internal *__find_target_type(const char *name)
+static inline struct target_type *__find_target_type(const char *name)
 {
-	struct tt_internal *ti;
+	struct target_type *tt;
 
-	list_for_each_entry (ti, &_targets, list)
-		if (!strcmp(name, ti->tt.name))
-			return ti;
+	list_for_each_entry(tt, &_targets, list)
+		if (!strcmp(name, tt->name))
+			return tt;
 
 	return NULL;
 }
 
-static struct tt_internal *get_target_type(const char *name)
+static struct target_type *get_target_type(const char *name)
 {
-	struct tt_internal *ti;
+	struct target_type *tt;
 
 	down_read(&_lock);
 
-	ti = __find_target_type(name);
-	if (ti) {
-		if ((ti->use == 0) && !try_module_get(ti->tt.module))
-			ti = NULL;
-		else
-			ti->use++;
-	}
+	tt = __find_target_type(name);
+	if (tt && !try_module_get(tt->module))
+		tt = NULL;
 
 	up_read(&_lock);
-	return ti;
+	return tt;
 }
 
 static void load_module(const char *name)
@@ -62,92 +51,59 @@ static void load_module(const char *name)
 
 struct target_type *dm_get_target_type(const char *name)
 {
-	struct tt_internal *ti = get_target_type(name);
+	struct target_type *tt = get_target_type(name);
 
-	if (!ti) {
+	if (!tt) {
 		load_module(name);
-		ti = get_target_type(name);
+		tt = get_target_type(name);
 	}
 
-	return ti ? &ti->tt : NULL;
+	return tt;
 }
 
-void dm_put_target_type(struct target_type *t)
+void dm_put_target_type(struct target_type *tt)
 {
-	struct tt_internal *ti = (struct tt_internal *) t;
-
 	down_read(&_lock);
-	if (--ti->use == 0)
-		module_put(ti->tt.module);
-
-	BUG_ON(ti->use < 0);
+	module_put(tt->module);
 	up_read(&_lock);
-
-	return;
-}
-
-static struct tt_internal *alloc_target(struct target_type *t)
-{
-	struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
-
-	if (ti)
-		ti->tt = *t;
-
-	return ti;
 }
 
-
 int dm_target_iterate(void (*iter_func)(struct target_type *tt,
 					void *param), void *param)
 {
-	struct tt_internal *ti;
+	struct target_type *tt;
 
 	down_read(&_lock);
-	list_for_each_entry (ti, &_targets, list)
-		iter_func(&ti->tt, param);
+	list_for_each_entry(tt, &_targets, list)
+		iter_func(tt, param);
 	up_read(&_lock);
 
 	return 0;
 }
 
-int dm_register_target(struct target_type *t)
+int dm_register_target(struct target_type *tt)
 {
 	int rv = 0;
-	struct tt_internal *ti = alloc_target(t);
-
-	if (!ti)
-		return -ENOMEM;
 
 	down_write(&_lock);
-	if (__find_target_type(t->name))
+	if (__find_target_type(tt->name))
 		rv = -EEXIST;
 	else
-		list_add(&ti->list, &_targets);
+		list_add(&tt->list, &_targets);
 
 	up_write(&_lock);
-	if (rv)
-		kfree(ti);
 	return rv;
 }
 
-void dm_unregister_target(struct target_type *t)
+void dm_unregister_target(struct target_type *tt)
 {
-	struct tt_internal *ti;
-
 	down_write(&_lock);
-	if (!(ti = __find_target_type(t->name))) {
-		DMCRIT("Unregistering unrecognised target: %s", t->name);
-		BUG();
-	}
-
-	if (ti->use) {
-		DMCRIT("Attempt to unregister target still in use: %s",
-		       t->name);
+	if (!__find_target_type(tt->name)) {
+		DMCRIT("Unregistering unrecognised target: %s", tt->name);
 		BUG();
 	}
 
-	list_del(&ti->list);
-	kfree(ti);
+	list_del(&tt->list);
 
 	up_write(&_lock);
 }
@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
  * io-err: always fails an io, useful for bringing
  * up LVs that have holes in them.
  */
-static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
+static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
 {
 	return 0;
 }
 
-static void io_err_dtr(struct dm_target *ti)
+static void io_err_dtr(struct dm_target *tt)
 {
 	/* empty */
 }
 
-static int io_err_map(struct dm_target *ti, struct bio *bio,
+static int io_err_map(struct dm_target *tt, struct bio *bio,
 		      union map_info *map_context)
 {
 	return -EIO;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d40f27..788ba96 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
 /*
  * Work processed by per-device workqueue.
  */
-struct dm_wq_req {
-	enum {
-		DM_WQ_FLUSH_DEFERRED,
-	} type;
-	struct work_struct work;
-	struct mapped_device *md;
-	void *context;
-};
-
 struct mapped_device {
 	struct rw_semaphore io_lock;
 	struct mutex suspend_lock;
-	spinlock_t pushback_lock;
 	rwlock_t map_lock;
 	atomic_t holders;
 	atomic_t open_count;
@@ -129,8 +119,9 @@ struct mapped_device {
 	 */
 	atomic_t pending;
 	wait_queue_head_t wait;
+	struct work_struct work;
 	struct bio_list deferred;
-	struct bio_list pushback;
+	spinlock_t deferred_lock;
 
 	/*
 	 * Processing queue (flush/barriers)
@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
 		return 1;
 	}
 
+	spin_lock_irq(&md->deferred_lock);
 	bio_list_add(&md->deferred, bio);
+	spin_unlock_irq(&md->deferred_lock);
 
 	up_write(&md->io_lock);
 	return 0;		/* deferred successfully */
@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
 		if (io->error == DM_ENDIO_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
-			 * This must be handled before the sleeper on
-			 * suspend queue merges the pushback list.
 			 */
-			spin_lock_irqsave(&md->pushback_lock, flags);
+			spin_lock_irqsave(&md->deferred_lock, flags);
 			if (__noflush_suspending(md))
-				bio_list_add(&md->pushback, io->bio);
+				bio_list_add(&md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
 				io->error = -EIO;
-			spin_unlock_irqrestore(&md->pushback_lock, flags);
+			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
 		end_io_acct(io);
@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
 }
 
 /*
- * Split the bio into several clones.
+ * Split the bio into several clones and submit it to targets.
  */
-static int __split_bio(struct mapped_device *md, struct bio *bio)
+static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 {
 	struct clone_info ci;
 	int error = 0;
 
 	ci.map = dm_get_table(md);
-	if (unlikely(!ci.map))
-		return -EIO;
+	if (unlikely(!ci.map)) {
+		bio_io_error(bio);
+		return;
+	}
 	if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
 		dm_table_put(ci.map);
 		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
+		return;
 	}
 	ci.md = md;
 	ci.bio = bio;
@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
 	/* drop the extra reference count */
 	dec_pending(ci.io, error);
 	dm_table_put(ci.map);
-
-	return 0;
 }
 /*-----------------------------------------------------------------
  * CRUD END
@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 		down_read(&md->io_lock);
 	}
 
-	r = __split_bio(md, bio);
+	__split_and_process_bio(md, bio);
 	up_read(&md->io_lock);
+	return 0;
 
 out_req:
 	if (r < 0)
@@ -1074,6 +1066,8 @@ out:
 
 static struct block_device_operations dm_blk_dops;
 
+static void dm_wq_work(struct work_struct *work);
+
 /*
  * Allocate and initialise a blank device with a given minor.
  */
@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	init_rwsem(&md->io_lock);
 	mutex_init(&md->suspend_lock);
-	spin_lock_init(&md->pushback_lock);
+	spin_lock_init(&md->deferred_lock);
 	rwlock_init(&md->map_lock);
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->open_count, 0);
@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
 	md->queue->backing_dev_info.congested_fn = dm_any_congested;
 	md->queue->backing_dev_info.congested_data = md;
 	blk_queue_make_request(md->queue, dm_request);
+	blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 	md->queue->unplug_fn = dm_unplug_all;
 	blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	atomic_set(&md->pending, 0);
 	init_waitqueue_head(&md->wait);
+	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
 
 	md->disk->major = _major;
@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
-static int dm_wait_for_completion(struct mapped_device *md)
+static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 {
 	int r = 0;
+	DECLARE_WAITQUEUE(wait, current);
+
+	dm_unplug_all(md->queue);
+
+	add_wait_queue(&md->wait, &wait);
 
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
+		set_current_state(interruptible);
 
 		smp_mb();
 		if (!atomic_read(&md->pending))
 			break;
 
-		if (signal_pending(current)) {
+		if (interruptible == TASK_INTERRUPTIBLE &&
+		    signal_pending(current)) {
 			r = -EINTR;
 			break;
 		}
@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
 	}
 	set_current_state(TASK_RUNNING);
 
+	remove_wait_queue(&md->wait, &wait);
+
 	return r;
 }
 
 /*
  * Process the deferred bios
  */
-static void __flush_deferred_io(struct mapped_device *md)
+static void dm_wq_work(struct work_struct *work)
 {
+	struct mapped_device *md = container_of(work, struct mapped_device,
+						work);
 	struct bio *c;
 
-	while ((c = bio_list_pop(&md->deferred))) {
-		if (__split_bio(md, c))
-			bio_io_error(c);
-	}
-
-	clear_bit(DMF_BLOCK_IO, &md->flags);
-}
+	down_write(&md->io_lock);
 
-static void __merge_pushback_list(struct mapped_device *md)
-{
-	unsigned long flags;
+next_bio:
+	spin_lock_irq(&md->deferred_lock);
+	c = bio_list_pop(&md->deferred);
+	spin_unlock_irq(&md->deferred_lock);
 
-	spin_lock_irqsave(&md->pushback_lock, flags);
-	clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-	bio_list_merge_head(&md->deferred, &md->pushback);
-	bio_list_init(&md->pushback);
-	spin_unlock_irqrestore(&md->pushback_lock, flags);
-}
+	if (c) {
+		__split_and_process_bio(md, c);
+		goto next_bio;
+	}
 
-static void dm_wq_work(struct work_struct *work)
-{
-	struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
-	struct mapped_device *md = req->md;
+	clear_bit(DMF_BLOCK_IO, &md->flags);
 
-	down_write(&md->io_lock);
-	switch (req->type) {
-	case DM_WQ_FLUSH_DEFERRED:
-		__flush_deferred_io(md);
-		break;
-	default:
-		DMERR("dm_wq_work: unrecognised work type %d", req->type);
-		BUG();
-	}
 	up_write(&md->io_lock);
 }
 
-static void dm_wq_queue(struct mapped_device *md, int type, void *context,
-			struct dm_wq_req *req)
-{
-	req->type = type;
-	req->md = md;
-	req->context = context;
-	INIT_WORK(&req->work, dm_wq_work);
-	queue_work(md->wq, &req->work);
-}
-
-static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+static void dm_queue_flush(struct mapped_device *md)
 {
-	struct dm_wq_req req;
-
-	dm_wq_queue(md, type, context, &req);
+	queue_work(md->wq, &md->work);
 	flush_workqueue(md->wq);
 }
 
@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
 	struct dm_table *map = NULL;
-	DECLARE_WAITQUEUE(wait, current);
 	int r = 0;
 	int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
 	int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 	down_write(&md->io_lock);
 	set_bit(DMF_BLOCK_IO, &md->flags);
 
-	add_wait_queue(&md->wait, &wait);
 	up_write(&md->io_lock);
 
-	/* unplug */
-	if (map)
-		dm_table_unplug_all(map);
-
 	/*
 	 * Wait for the already-mapped ios to complete.
 	 */
-	r = dm_wait_for_completion(md);
+	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
 
 	down_write(&md->io_lock);
-	remove_wait_queue(&md->wait, &wait);
 
 	if (noflush)
-		__merge_pushback_list(md);
+		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 	up_write(&md->io_lock);
 
 	/* were we interrupted ? */
 	if (r < 0) {
-		dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+		dm_queue_flush(md);
 
 		unlock_fs(md);
 		goto out; /* pushback list is already flushed, so skip flush */
@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
 	if (r)
 		goto out;
 
-	dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+	dm_queue_flush(md);
 
 	unlock_fs(md);
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 20194e0..b48397c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
 int dm_target_init(void);
 void dm_target_exit(void);
 struct target_type *dm_get_target_type(const char *name);
-void dm_put_target_type(struct target_type *t);
+void dm_put_target_type(struct target_type *tt);
 int dm_target_iterate(void (*iter_func)(struct target_type *tt,
 					void *param), void *param);
 
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 86d9adf..8695809 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -62,7 +62,10 @@
 #define	ModeShift	5
 
 #define MaxFault	50
-#include <linux/raid/md.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include "md.h"
+#include <linux/seq_file.h>
 
 
 static void faulty_fail(struct bio *bio, int error)
@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
 	return 0;
 }
 
+static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	WARN_ONCE(raid_disks,
+		  "%s does not support generic reshape\n", __func__);
+
+	if (sectors == 0)
+		return mddev->dev_sectors;
+
+	return sectors;
+}
+
 static int run(mddev_t *mddev)
 {
 	mdk_rdev_t *rdev;
@@ -298,7 +312,7 @@ static int run(mddev_t *mddev)
 	list_for_each_entry(rdev, &mddev->disks, same_set)
 		conf->rdev = rdev;
 
-	mddev->array_sectors = mddev->size * 2;
+	md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
 	mddev->private = conf;
 
 	reconfig(mddev, mddev->layout, -1);
@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality =
 	.stop		= stop,
 	.status		= status,
 	.reconfig	= reconfig,
+	.size		= faulty_size,
 };
 
 static int __init raid_init(void)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 09658b2..7a36e38 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -16,7 +16,11 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
 */
 
-#include <linux/raid/linear.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "linear.h"
 
 /*
  * find which device holds a particular offset 
@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits)
 	return ret;
 }
 
+static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	linear_conf_t *conf = mddev_to_conf(mddev);
+
+	WARN_ONCE(sectors || raid_disks,
+		  "%s does not support generic reshape\n", __func__);
+
+	return conf->array_sectors;
+}
+
 static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 {
 	linear_conf_t *conf;
@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
-		disk->num_sectors = rdev->size * 2;
-		conf->array_sectors += rdev->size * 2;
+		disk->num_sectors = rdev->sectors;
+		conf->array_sectors += rdev->sectors;
 
 		cnt++;
 	}
@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev)
 	if (!conf)
 		return 1;
 	mddev->private = conf;
-	mddev->array_sectors = conf->array_sectors;
+	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
 
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
 	mddev->queue->unplug_fn = linear_unplug;
@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
 	newconf->prev = mddev_to_conf(mddev);
 	mddev->private = newconf;
 	mddev->raid_disks++;
-	mddev->array_sectors = newconf->array_sectors;
+	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
 	set_capacity(mddev->gendisk, mddev->array_sectors);
 	return 0;
 }
@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality =
 	.stop		= linear_stop,
 	.status		= linear_status,
 	.hot_add_disk	= linear_add,
+	.size		= linear_size,
 };
 
 static int __init linear_init (void)
diff --git a/include/linux/raid/linear.h b/drivers/md/linear.h
index f38b9c5..bf81795 100644
--- a/include/linux/raid/linear.h
+++ b/drivers/md/linear.h
@@ -1,8 +1,6 @@
 #ifndef _LINEAR_H
 #define _LINEAR_H
 
-#include <linux/raid/md.h>
-
 struct dev_info {
 	mdk_rdev_t	*rdev;
 	sector_t	num_sectors;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a307f87..ed5727c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -33,9 +33,9 @@
 */
 
 #include <linux/kthread.h>
-#include <linux/raid/md.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
 #include <linux/sysctl.h>
+#include <linux/seq_file.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/poll.h>
 #include <linux/ctype.h>
@@ -45,11 +45,10 @@
 #include <linux/reboot.h>
 #include <linux/file.h>
 #include <linux/delay.h>
-
-#define MAJOR_NR MD_MAJOR
-
-/* 63 partitions with the alternate major number (mdp) */
-#define MdpMinorShift 6
+#include <linux/raid/md_p.h>
+#include <linux/raid/md_u.h>
+#include "md.h"
+#include "bitmap.h"
 
 #define DEBUG 0
 #define dprintk(x...) ((void)(DEBUG && printk(x)))
@@ -202,12 +201,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
 		)
 
 
-static int md_fail_request(struct request_queue *q, struct bio *bio)
+/* Rather than calling directly into the personality make_request function,
+ * IO requests come here first so that we can check if the device is
+ * being suspended pending a reconfiguration.
+ * We hold a refcount over the call to ->make_request.  By the time that
+ * call has finished, the bio has been linked into some internal structure
+ * and so is visible to ->quiesce(), so we don't need the refcount any more.
+ */
+static int md_make_request(struct request_queue *q, struct bio *bio)
 {
-	bio_io_error(bio);
-	return 0;
+	mddev_t *mddev = q->queuedata;
+	int rv;
+	if (mddev == NULL || mddev->pers == NULL) {
+		bio_io_error(bio);
+		return 0;
+	}
+	rcu_read_lock();
+	if (mddev->suspended) {
+		DEFINE_WAIT(__wait);
+		for (;;) {
+			prepare_to_wait(&mddev->sb_wait, &__wait,
+					TASK_UNINTERRUPTIBLE);
+			if (!mddev->suspended)
+				break;
+			rcu_read_unlock();
+			schedule();
+			rcu_read_lock();
+		}
+		finish_wait(&mddev->sb_wait, &__wait);
+	}
+	atomic_inc(&mddev->active_io);
+	rcu_read_unlock();
+	rv = mddev->pers->make_request(q, bio);
+	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+		wake_up(&mddev->sb_wait);
+
+	return rv;
+}
+
+static void mddev_suspend(mddev_t *mddev)
+{
+	BUG_ON(mddev->suspended);
+	mddev->suspended = 1;
+	synchronize_rcu();
+	wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+	mddev->pers->quiesce(mddev, 1);
+	md_unregister_thread(mddev->thread);
+	mddev->thread = NULL;
+	/* we now know that no code is executing in the personality module,
+	 * except possibly the tail end of a ->bi_end_io function, but that
+	 * is certain to complete before the module has a chance to get
+	 * unloaded
+	 */
+}
+
+static void mddev_resume(mddev_t *mddev)
+{
+	mddev->suspended = 0;
+	wake_up(&mddev->sb_wait);
+	mddev->pers->quiesce(mddev, 0);
 }
 
+
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
 	atomic_inc(&mddev->active);
@@ -310,6 +365,7 @@ static mddev_t * mddev_find(dev_t unit)
 	init_timer(&new->safemode_timer);
 	atomic_set(&new->active, 1);
 	atomic_set(&new->openers, 0);
+	atomic_set(&new->active_io, 0);
 	spin_lock_init(&new->write_lock);
 	init_waitqueue_head(&new->sb_wait);
 	init_waitqueue_head(&new->recovery_wait);
@@ -326,6 +382,11 @@ static inline int mddev_lock(mddev_t * mddev)
 	return mutex_lock_interruptible(&mddev->reconfig_mutex);
 }
 
+static inline int mddev_is_locked(mddev_t *mddev)
+{
+	return mutex_is_locked(&mddev->reconfig_mutex);
+}
+
 static inline int mddev_trylock(mddev_t * mddev)
 {
 	return mutex_trylock(&mddev->reconfig_mutex);
@@ -409,7 +470,7 @@ static void free_disk_sb(mdk_rdev_t * rdev)
 		rdev->sb_loaded = 0;
 		rdev->sb_page = NULL;
 		rdev->sb_start = 0;
-		rdev->size = 0;
+		rdev->sectors = 0;
 	}
 }
 
@@ -775,9 +836,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 		else 
 			ret = 0;
 	}
-	rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2;
+	rdev->sectors = calc_num_sectors(rdev, sb->chunk_size);
 
-	if (rdev->size < sb->size && sb->level > 1)
+	if (rdev->sectors < sb->size * 2 && sb->level > 1)
 		/* "this cannot possibly happen" ... */
 		ret = -EINVAL;
 
@@ -812,7 +873,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->clevel[0] = 0;
 		mddev->layout = sb->layout;
 		mddev->raid_disks = sb->raid_disks;
-		mddev->size = sb->size;
+		mddev->dev_sectors = sb->size * 2;
 		mddev->events = ev1;
 		mddev->bitmap_offset = 0;
 		mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -926,7 +987,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
 	sb->ctime = mddev->ctime;
 	sb->level = mddev->level;
-	sb->size  = mddev->size;
+	sb->size = mddev->dev_sectors / 2;
 	sb->raid_disks = mddev->raid_disks;
 	sb->md_minor = mddev->md_minor;
 	sb->not_persistent = 0;
@@ -1024,7 +1085,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 static unsigned long long
 super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
 {
-	if (num_sectors && num_sectors < rdev->mddev->size * 2)
+	if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
 		return 0; /* component must fit device */
 	if (rdev->mddev->bitmap_offset)
 		return 0; /* can't move bitmap */
@@ -1180,16 +1241,17 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 			ret = 0;
 	}
 	if (minor_version)
-		rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
+		rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
+			le64_to_cpu(sb->data_offset);
 	else
-		rdev->size = rdev->sb_start / 2;
-	if (rdev->size < le64_to_cpu(sb->data_size)/2)
+		rdev->sectors = rdev->sb_start;
+	if (rdev->sectors < le64_to_cpu(sb->data_size))
 		return -EINVAL;
-	rdev->size = le64_to_cpu(sb->data_size)/2;
+	rdev->sectors = le64_to_cpu(sb->data_size);
 	if (le32_to_cpu(sb->chunksize))
-		rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
+		rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1);
 
-	if (le64_to_cpu(sb->size) > rdev->size*2)
+	if (le64_to_cpu(sb->size) > rdev->sectors)
 		return -EINVAL;
 	return ret;
 }
@@ -1216,7 +1278,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->clevel[0] = 0;
 		mddev->layout = le32_to_cpu(sb->layout);
 		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
-		mddev->size = le64_to_cpu(sb->size)/2;
+		mddev->dev_sectors = le64_to_cpu(sb->size);
 		mddev->events = ev1;
 		mddev->bitmap_offset = 0;
 		mddev->default_bitmap_offset = 1024 >> 9;
@@ -1312,7 +1374,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
 
 	sb->raid_disks = cpu_to_le32(mddev->raid_disks);
-	sb->size = cpu_to_le64(mddev->size<<1);
+	sb->size = cpu_to_le64(mddev->dev_sectors);
 
 	if (mddev->bitmap && mddev->bitmap_file == NULL) {
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
@@ -1320,10 +1382,15 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	}
 
 	if (rdev->raid_disk >= 0 &&
-	    !test_bit(In_sync, &rdev->flags) &&
-	    rdev->recovery_offset > 0) {
-		sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
-		sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
+	    !test_bit(In_sync, &rdev->flags)) {
+		if (mddev->curr_resync_completed > rdev->recovery_offset)
+			rdev->recovery_offset = mddev->curr_resync_completed;
+		if (rdev->recovery_offset > 0) {
+			sb->feature_map |=
+				cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
+			sb->recovery_offset =
+				cpu_to_le64(rdev->recovery_offset);
+		}
 	}
 
 	if (mddev->reshape_position != MaxSector) {
@@ -1365,7 +1432,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
 {
 	struct mdp_superblock_1 *sb;
 	sector_t max_sectors;
-	if (num_sectors && num_sectors < rdev->mddev->size * 2)
+	if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
 		return 0; /* component must fit device */
 	if (rdev->sb_start < rdev->data_offset) {
 		/* minor versions 1 and 2; superblock before data */
@@ -1381,7 +1448,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
 		sector_t sb_start;
 		sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
 		sb_start &= ~(sector_t)(4*2 - 1);
-		max_sectors = rdev->size * 2 + sb_start - rdev->sb_start;
+		max_sectors = rdev->sectors + sb_start - rdev->sb_start;
 		if (!num_sectors || num_sectors > max_sectors)
 			num_sectors = max_sectors;
 		rdev->sb_start = sb_start;
@@ -1433,6 +1500,38 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
 
 static LIST_HEAD(pending_raid_disks);
 
+static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+	struct mdk_personality *pers = mddev->pers;
+	struct gendisk *disk = mddev->gendisk;
+	struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
+	struct blk_integrity *bi_mddev = blk_get_integrity(disk);
+
+	/* Data integrity passthrough not supported on RAID 4, 5 and 6 */
+	if (pers && pers->level >= 4 && pers->level <= 6)
+		return;
+
+	/* If rdev is integrity capable, register profile for mddev */
+	if (!bi_mddev && bi_rdev) {
+		if (blk_integrity_register(disk, bi_rdev))
+			printk(KERN_ERR "%s: %s Could not register integrity!\n",
+			       __func__, disk->disk_name);
+		else
+			printk(KERN_NOTICE "Enabling data integrity on %s\n",
+			       disk->disk_name);
+		return;
+	}
+
+	/* Check that mddev and rdev have matching profiles */
+	if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
+		printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
+		       disk->disk_name, rdev->bdev->bd_disk->disk_name);
+		printk(KERN_NOTICE "Disabling data integrity on %s\n",
+		       disk->disk_name);
+		blk_integrity_unregister(disk);
+	}
+}
+
 static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 {
 	char b[BDEVNAME_SIZE];
@@ -1449,8 +1548,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 	if (find_rdev(mddev, rdev->bdev->bd_dev))
 		return -EEXIST;
 
-	/* make sure rdev->size exceeds mddev->size */
-	if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
+	/* make sure rdev->sectors exceeds mddev->dev_sectors */
+	if (rdev->sectors && (mddev->dev_sectors == 0 ||
+			rdev->sectors < mddev->dev_sectors)) {
 		if (mddev->pers) {
 			/* Cannot change size, so fail
 			 * If mddev->level <= 0, then we don't care
@@ -1459,7 +1559,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 			if (mddev->level > 0)
 				return -ENOSPC;
 		} else
-			mddev->size = rdev->size;
+			mddev->dev_sectors = rdev->sectors;
 	}
 
 	/* Verify rdev->desc_nr is unique.
@@ -1503,6 +1603,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 
 	/* May as well allow recovery to be retried once */
 	mddev->recovery_disabled = 0;
+
+	md_integrity_check(rdev, mddev);
 	return 0;
 
  fail:
@@ -1713,8 +1815,8 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
 static void print_rdev(mdk_rdev_t *rdev, int major_version)
 {
 	char b[BDEVNAME_SIZE];
-	printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
-		bdevname(rdev->bdev,b), (unsigned long long)rdev->size,
+	printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
+		bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
 	        test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
 	        rdev->desc_nr);
 	if (rdev->sb_loaded) {
@@ -2153,7 +2255,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 		return -EINVAL;
 	if (rdev->mddev->pers && rdev->raid_disk >= 0)
 		return -EBUSY;
-	if (rdev->size && rdev->mddev->external)
+	if (rdev->sectors && rdev->mddev->external)
 		/* Must set offset before size, so overlap checks
 		 * can be sane */
 		return -EBUSY;
@@ -2167,7 +2269,7 @@ __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
 static ssize_t
 rdev_size_show(mdk_rdev_t *rdev, char *page)
 {
-	return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
+	return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
 }
 
 static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
@@ -2180,34 +2282,52 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
 	return 1;
 }
 
+static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
+{
+	unsigned long long blocks;
+	sector_t new;
+
+	if (strict_strtoull(buf, 10, &blocks) < 0)
+		return -EINVAL;
+
+	if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
+		return -EINVAL; /* sector conversion overflow */
+
+	new = blocks * 2;
+	if (new != blocks * 2)
+		return -EINVAL; /* unsigned long long to sector_t overflow */
+
+	*sectors = new;
+	return 0;
+}
+
 static ssize_t
 rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
-	unsigned long long size;
-	unsigned long long oldsize = rdev->size;
 	mddev_t *my_mddev = rdev->mddev;
+	sector_t oldsectors = rdev->sectors;
+	sector_t sectors;
 
-	if (strict_strtoull(buf, 10, &size) < 0)
+	if (strict_blocks_to_sectors(buf, &sectors) < 0)
 		return -EINVAL;
 	if (my_mddev->pers && rdev->raid_disk >= 0) {
 		if (my_mddev->persistent) {
-			size = super_types[my_mddev->major_version].
-				rdev_size_change(rdev, size * 2);
-			if (!size)
+			sectors = super_types[my_mddev->major_version].
+				rdev_size_change(rdev, sectors);
+			if (!sectors)
 				return -EBUSY;
-		} else if (!size) {
-			size = (rdev->bdev->bd_inode->i_size >> 10);
-			size -= rdev->data_offset/2;
-		}
+		} else if (!sectors)
+			sectors = (rdev->bdev->bd_inode->i_size >> 9) -
+				rdev->data_offset;
 	}
-	if (size < my_mddev->size)
+	if (sectors < my_mddev->dev_sectors)
 		return -EINVAL; /* component must fit device */
 
-	rdev->size = size;
-	if (size > oldsize && my_mddev->external) {
+	rdev->sectors = sectors;
+	if (sectors > oldsectors && my_mddev->external) {
 		/* need to check that all other rdevs with the same ->bdev
 		 * do not overlap.  We need to unlock the mddev to avoid
-		 * a deadlock.  We have already changed rdev->size, and if
+		 * a deadlock.  We have already changed rdev->sectors, and if
 		 * we have to change it back, we will have the lock again.
 		 */
 		mddev_t *mddev;
@@ -2223,9 +2343,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 				if (test_bit(AllReserved, &rdev2->flags) ||
 				    (rdev->bdev == rdev2->bdev &&
 				     rdev != rdev2 &&
-				     overlaps(rdev->data_offset, rdev->size * 2,
+				     overlaps(rdev->data_offset, rdev->sectors,
 					      rdev2->data_offset,
-					      rdev2->size * 2))) {
+					      rdev2->sectors))) {
 					overlap = 1;
 					break;
 				}
@@ -2239,11 +2359,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 		if (overlap) {
 			/* Someone else could have slipped in a size
 			 * change here, but doing so is just silly.
-			 * We put oldsize back because we *know* it is
+			 * We put oldsectors back because we *know* it is
 			 * safe, and trust userspace not to race with
 			 * itself
 			 */
-			rdev->size = oldsize;
+			rdev->sectors = oldsectors;
 			return -EBUSY;
 		}
 	}
@@ -2547,18 +2667,101 @@ level_show(mddev_t *mddev, char *page)
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
+	char level[16];
 	ssize_t rv = len;
-	if (mddev->pers)
+	struct mdk_personality *pers;
+	void *priv;
+
+	if (mddev->pers == NULL) {
+		if (len == 0)
+			return 0;
+		if (len >= sizeof(mddev->clevel))
+			return -ENOSPC;
+		strncpy(mddev->clevel, buf, len);
+		if (mddev->clevel[len-1] == '\n')
+			len--;
+		mddev->clevel[len] = 0;
+		mddev->level = LEVEL_NONE;
+		return rv;
+	}
+
+	/* request to change the personality.  Need to ensure:
+	 *  - array is not engaged in resync/recovery/reshape
+	 *  - old personality can be suspended
+	 *  - new personality will access other array.
+	 */
+
+	if (mddev->sync_thread || mddev->reshape_position != MaxSector)
 		return -EBUSY;
-	if (len == 0)
-		return 0;
-	if (len >= sizeof(mddev->clevel))
-		return -ENOSPC;
-	strncpy(mddev->clevel, buf, len);
-	if (mddev->clevel[len-1] == '\n')
+
+	if (!mddev->pers->quiesce) {
+		printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
+		       mdname(mddev), mddev->pers->name);
+		return -EINVAL;
+	}
+
+	/* Now find the new personality */
+	if (len == 0 || len >= sizeof(level))
+		return -EINVAL;
+	strncpy(level, buf, len);
+	if (level[len-1] == '\n')
 		len--;
-	mddev->clevel[len] = 0;
-	mddev->level = LEVEL_NONE;
+	level[len] = 0;
+
+	request_module("md-%s", level);
+	spin_lock(&pers_lock);
+	pers = find_pers(LEVEL_NONE, level);
+	if (!pers || !try_module_get(pers->owner)) {
+		spin_unlock(&pers_lock);
+		printk(KERN_WARNING "md: personality %s not loaded\n", level);
+		return -EINVAL;
+	}
+	spin_unlock(&pers_lock);
+
+	if (pers == mddev->pers) {
+		/* Nothing to do! */
+		module_put(pers->owner);
+		return rv;
+	}
+	if (!pers->takeover) {
+		module_put(pers->owner);
+		printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
+		       mdname(mddev), level);
+		return -EINVAL;
+	}
+
+	/* ->takeover must set new_* and/or delta_disks
+	 * if it succeeds, and may set them when it fails.
+	 */
+	priv = pers->takeover(mddev);
+	if (IS_ERR(priv)) {
+		mddev->new_level = mddev->level;
+		mddev->new_layout = mddev->layout;
+		mddev->new_chunk = mddev->chunk_size;
+		mddev->raid_disks -= mddev->delta_disks;
+		mddev->delta_disks = 0;
+		module_put(pers->owner);
+		printk(KERN_WARNING "md: %s: %s would not accept array\n",
+		       mdname(mddev), level);
+		return PTR_ERR(priv);
+	}
+
+	/* Looks like we have a winner */
+	mddev_suspend(mddev);
+	mddev->pers->stop(mddev);
+	module_put(mddev->pers->owner);
+	mddev->pers = pers;
+	mddev->private = priv;
+	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+	mddev->level = mddev->new_level;
+	mddev->layout = mddev->new_layout;
+	mddev->chunk_size = mddev->new_chunk;
+	mddev->delta_disks = 0;
+	pers->run(mddev);
+	mddev_resume(mddev);
+	set_bit(MD_CHANGE_DEVS, &mddev->flags);
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	md_wakeup_thread(mddev->thread);
 	return rv;
 }
 
@@ -2586,12 +2789,18 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
 	if (!*buf || (*e && *e != '\n'))
 		return -EINVAL;
 
-	if (mddev->pers)
-		return -EBUSY;
-	if (mddev->reshape_position != MaxSector)
+	if (mddev->pers) {
+		int err;
+		if (mddev->pers->reconfig == NULL)
+			return -EBUSY;
+		err = mddev->pers->reconfig(mddev, n, -1);
+		if (err)
+			return err;
+	} else {
 		mddev->new_layout = n;
-	else
-		mddev->layout = n;
+		if (mddev->reshape_position == MaxSector)
+			mddev->layout = n;
+	}
 	return len;
 }
 static struct md_sysfs_entry md_layout =
@@ -2648,19 +2857,24 @@ chunk_size_show(mddev_t *mddev, char *page)
 static ssize_t
 chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
 {
-	/* can only set chunk_size if array is not yet active */
 	char *e;
 	unsigned long n = simple_strtoul(buf, &e, 10);
 
 	if (!*buf || (*e && *e != '\n'))
 		return -EINVAL;
 
-	if (mddev->pers)
-		return -EBUSY;
-	else if (mddev->reshape_position != MaxSector)
+	if (mddev->pers) {
+		int err;
+		if (mddev->pers->reconfig == NULL)
+			return -EBUSY;
+		err = mddev->pers->reconfig(mddev, -1, n);
+		if (err)
+			return err;
+	} else {
 		mddev->new_chunk = n;
-	else
-		mddev->chunk_size = n;
+		if (mddev->reshape_position == MaxSector)
+			mddev->chunk_size = n;
+	}
 	return len;
 }
 static struct md_sysfs_entry md_chunk_size =
@@ -2669,6 +2883,8 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
 static ssize_t
 resync_start_show(mddev_t *mddev, char *page)
 {
+	if (mddev->recovery_cp == MaxSector)
+		return sprintf(page, "none\n");
 	return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
 }
 
@@ -2766,7 +2982,7 @@ array_state_show(mddev_t *mddev, char *page)
 	else {
 		if (list_empty(&mddev->disks) &&
 		    mddev->raid_disks == 0 &&
-		    mddev->size == 0)
+		    mddev->dev_sectors == 0)
 			st = clear;
 		else
 			st = inactive;
@@ -2973,7 +3189,8 @@ __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
 static ssize_t
 size_show(mddev_t *mddev, char *page)
 {
-	return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
+	return sprintf(page, "%llu\n",
+		(unsigned long long)mddev->dev_sectors / 2);
 }
 
 static int update_size(mddev_t *mddev, sector_t num_sectors);
@@ -2985,20 +3202,18 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
 	 * not increase it (except from 0).
 	 * If array is active, we can try an on-line resize
 	 */
-	char *e;
-	int err = 0;
-	unsigned long long size = simple_strtoull(buf, &e, 10);
-	if (!*buf || *buf == '\n' ||
-	    (*e && *e != '\n'))
-		return -EINVAL;
+	sector_t sectors;
+	int err = strict_blocks_to_sectors(buf, &sectors);
 
+	if (err < 0)
+		return err;
 	if (mddev->pers) {
-		err = update_size(mddev, size * 2);
+		err = update_size(mddev, sectors);
 		md_update_sb(mddev, 1);
 	} else {
-		if (mddev->size == 0 ||
-		    mddev->size > size)
-			mddev->size = size;
+		if (mddev->dev_sectors == 0 ||
+		    mddev->dev_sectors > sectors)
+			mddev->dev_sectors = sectors;
 		else
 			err = -ENOSPC;
 	}
@@ -3251,6 +3466,8 @@ static ssize_t
 sync_speed_show(mddev_t *mddev, char *page)
 {
 	unsigned long resync, dt, db;
+	if (mddev->curr_resync == 0)
+		return sprintf(page, "none\n");
 	resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
 	dt = (jiffies - mddev->resync_mark) / HZ;
 	if (!dt) dt++;
@@ -3263,15 +3480,15 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
 static ssize_t
 sync_completed_show(mddev_t *mddev, char *page)
 {
-	unsigned long max_blocks, resync;
+	unsigned long max_sectors, resync;
 
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-		max_blocks = mddev->resync_max_sectors;
+		max_sectors = mddev->resync_max_sectors;
 	else
-		max_blocks = mddev->size << 1;
+		max_sectors = mddev->dev_sectors;
 
 	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
-	return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+	return sprintf(page, "%lu / %lu\n", resync, max_sectors);
 }
 
 static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -3431,6 +3648,57 @@ static struct md_sysfs_entry md_reshape_position =
 __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
        reshape_position_store);
 
+static ssize_t
+array_size_show(mddev_t *mddev, char *page)
+{
+	if (mddev->external_size)
+		return sprintf(page, "%llu\n",
+			       (unsigned long long)mddev->array_sectors/2);
+	else
+		return sprintf(page, "default\n");
+}
+
+static ssize_t
+array_size_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	sector_t sectors;
+
+	if (strncmp(buf, "default", 7) == 0) {
+		if (mddev->pers)
+			sectors = mddev->pers->size(mddev, 0, 0);
+		else
+			sectors = mddev->array_sectors;
+
+		mddev->external_size = 0;
+	} else {
+		if (strict_blocks_to_sectors(buf, &sectors) < 0)
+			return -EINVAL;
+		if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
+			return -EINVAL;
+
+		mddev->external_size = 1;
+	}
+
+	mddev->array_sectors = sectors;
+	set_capacity(mddev->gendisk, mddev->array_sectors);
+	if (mddev->pers) {
+		struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
+
+		if (bdev) {
+			mutex_lock(&bdev->bd_inode->i_mutex);
+			i_size_write(bdev->bd_inode,
+				     (loff_t)mddev->array_sectors << 9);
+			mutex_unlock(&bdev->bd_inode->i_mutex);
+			bdput(bdev);
+		}
+	}
+
+	return len;
+}
+
+static struct md_sysfs_entry md_array_size =
+__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
+       array_size_store);
 
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
@@ -3444,6 +3712,7 @@ static struct attribute *md_default_attrs[] = {
 	&md_safe_delay.attr,
 	&md_array_state.attr,
 	&md_reshape_position.attr,
+	&md_array_size.attr,
 	NULL,
 };
 
@@ -3602,10 +3871,12 @@ static int md_alloc(dev_t dev, char *name)
 		mddev_put(mddev);
 		return -ENOMEM;
 	}
+	mddev->queue->queuedata = mddev;
+
 	/* Can be unlocked because the queue is new: no concurrency */
 	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
 
-	blk_queue_make_request(mddev->queue, md_fail_request);
+	blk_queue_make_request(mddev->queue, md_make_request);
 
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
@@ -3731,13 +4002,13 @@ static int do_md_run(mddev_t * mddev)
 		list_for_each_entry(rdev, &mddev->disks, same_set) {
 			if (test_bit(Faulty, &rdev->flags))
 				continue;
-			if (rdev->size < chunk_size / 1024) {
+			if (rdev->sectors < chunk_size / 512) {
 				printk(KERN_WARNING
 					"md: Dev %s smaller than chunk_size:"
-					" %lluk < %dk\n",
+					" %llu < %d\n",
 					bdevname(rdev->bdev,b),
-					(unsigned long long)rdev->size,
-					chunk_size / 1024);
+					(unsigned long long)rdev->sectors,
+					chunk_size / 512);
 				return -EINVAL;
 			}
 		}
@@ -3761,11 +4032,11 @@ static int do_md_run(mddev_t * mddev)
 
 		/* perform some consistency tests on the device.
 		 * We don't want the data to overlap the metadata,
-		 * Internal Bitmap issues has handled elsewhere.
+		 * Internal Bitmap issues have been handled elsewhere.
 		 */
 		if (rdev->data_offset < rdev->sb_start) {
-			if (mddev->size &&
-			    rdev->data_offset + mddev->size*2
+			if (mddev->dev_sectors &&
+			    rdev->data_offset + mddev->dev_sectors
 			    > rdev->sb_start) {
 				printk("md: %s: data overlaps metadata\n",
 				       mdname(mddev));
@@ -3801,9 +4072,16 @@ static int do_md_run(mddev_t * mddev)
 	}
 	mddev->pers = pers;
 	spin_unlock(&pers_lock);
-	mddev->level = pers->level;
+	if (mddev->level != pers->level) {
+		mddev->level = pers->level;
+		mddev->new_level = pers->level;
+	}
 	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
+	if (pers->level >= 4 && pers->level <= 6)
+		/* Cannot support integrity (yet) */
+		blk_integrity_unregister(mddev->gendisk);
+
 	if (mddev->reshape_position != MaxSector &&
 	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
@@ -3843,7 +4121,9 @@ static int do_md_run(mddev_t * mddev)
 	}
 
 	mddev->recovery = 0;
-	mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
+	/* may be over-ridden by personality */
+	mddev->resync_max_sectors = mddev->dev_sectors;
+
 	mddev->barriers_work = 1;
 	mddev->ok_start_degraded = start_dirty_degraded;
 
@@ -3853,7 +4133,17 @@ static int do_md_run(mddev_t * mddev)
 	err = mddev->pers->run(mddev);
 	if (err)
 		printk(KERN_ERR "md: pers->run() failed ...\n");
-	else if (mddev->pers->sync_request) {
+	else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
+		WARN_ONCE(!mddev->external_size, "%s: default size too small,"
+			  " but 'external_size' not in effect?\n", __func__);
+		printk(KERN_ERR
+		       "md: invalid array_size %llu > default size %llu\n",
+		       (unsigned long long)mddev->array_sectors / 2,
+		       (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
+		err = -EINVAL;
+		mddev->pers->stop(mddev);
+	}
+	if (err == 0 && mddev->pers->sync_request) {
 		err = bitmap_create(mddev);
 		if (err) {
 			printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
@@ -3899,16 +4189,6 @@ static int do_md_run(mddev_t * mddev)
 
 	set_capacity(disk, mddev->array_sectors);
 
-	/* If we call blk_queue_make_request here, it will
-	 * re-initialise max_sectors etc which may have been
-	 * refined inside -> run.  So just set the bits we need to set.
-	 * Most initialisation happended when we called
-	 * blk_queue_make_request(..., md_fail_request)
-	 * earlier.
-	 */
-	mddev->queue->queuedata = mddev;
-	mddev->queue->make_request_fn = mddev->pers->make_request;
-
 	/* If there is a partially-recovered drive we need to
 	 * start recovery here.  If we leave it to md_check_recovery,
 	 * it will remove the drives and not do the right thing
@@ -4038,7 +4318,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
 			md_super_wait(mddev);
 			if (mddev->ro)
 				set_disk_ro(disk, 0);
-			blk_queue_make_request(mddev->queue, md_fail_request);
+
 			mddev->pers->stop(mddev);
 			mddev->queue->merge_bvec_fn = NULL;
 			mddev->queue->unplug_fn = NULL;
@@ -4095,7 +4375,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
 		export_array(mddev);
 
 		mddev->array_sectors = 0;
-		mddev->size = 0;
+		mddev->external_size = 0;
+		mddev->dev_sectors = 0;
 		mddev->raid_disks = 0;
 		mddev->recovery_cp = 0;
 		mddev->resync_min = 0;
@@ -4135,6 +4416,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
 			mdname(mddev));
 	err = 0;
+	blk_integrity_unregister(disk);
 	md_new_event(mddev);
 	sysfs_notify_dirent(mddev->sysfs_state);
 out:
@@ -4300,8 +4582,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
 	info.patch_version = MD_PATCHLEVEL_VERSION;
 	info.ctime         = mddev->ctime;
 	info.level         = mddev->level;
-	info.size          = mddev->size;
-	if (info.size != mddev->size) /* overflow */
+	info.size          = mddev->dev_sectors / 2;
+	if (info.size != mddev->dev_sectors / 2) /* overflow */
 		info.size = -1;
 	info.nr_disks      = nr;
 	info.raid_disks    = mddev->raid_disks;
@@ -4480,6 +4762,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 		clear_bit(In_sync, &rdev->flags); /* just to be sure */
 		if (info->state & (1<<MD_DISK_WRITEMOSTLY))
 			set_bit(WriteMostly, &rdev->flags);
+		else
+			clear_bit(WriteMostly, &rdev->flags);
 
 		rdev->raid_disk = -1;
 		err = bind_rdev_to_array(rdev, mddev);
@@ -4543,7 +4827,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 			rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
 		} else 
 			rdev->sb_start = calc_dev_sboffset(rdev->bdev);
-		rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
+		rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
 
 		err = bind_rdev_to_array(rdev, mddev);
 		if (err) {
@@ -4613,7 +4897,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	else
 		rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
 
-	rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
+	rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
 
 	if (test_bit(Faulty, &rdev->flags)) {
 		printk(KERN_WARNING 
@@ -4749,7 +5033,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 
 	mddev->level         = info->level;
 	mddev->clevel[0]     = 0;
-	mddev->size          = info->size;
+	mddev->dev_sectors   = 2 * (sector_t)info->size;
 	mddev->raid_disks    = info->raid_disks;
 	/* don't set md_minor, it is determined by which /dev/md* was
 	 * openned
@@ -4788,6 +5072,17 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 	return 0;
 }
 
+void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
+{
+	WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
+
+	if (mddev->external_size)
+		return;
+
+	mddev->array_sectors = array_sectors;
+}
+EXPORT_SYMBOL(md_set_array_sectors);
+
 static int update_size(mddev_t *mddev, sector_t num_sectors)
 {
 	mdk_rdev_t *rdev;
@@ -4814,8 +5109,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
 		 */
 		return -EBUSY;
 	list_for_each_entry(rdev, &mddev->disks, same_set) {
-		sector_t avail;
-		avail = rdev->size * 2;
+		sector_t avail = rdev->sectors;
 
 		if (fit && (num_sectors == 0 || num_sectors > avail))
 			num_sectors = avail;
@@ -4887,12 +5181,18 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
 		)
 		return -EINVAL;
 	/* Check there is only one change */
-	if (info->size >= 0 && mddev->size != info->size) cnt++;
-	if (mddev->raid_disks != info->raid_disks) cnt++;
-	if (mddev->layout != info->layout) cnt++;
-	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
-	if (cnt == 0) return 0;
-	if (cnt > 1) return -EINVAL;
+	if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
+		cnt++;
+	if (mddev->raid_disks != info->raid_disks)
+		cnt++;
+	if (mddev->layout != info->layout)
+		cnt++;
+	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
+		cnt++;
+	if (cnt == 0)
+		return 0;
+	if (cnt > 1)
+		return -EINVAL;
 
 	if (mddev->layout != info->layout) {
 		/* Change layout
@@ -4904,7 +5204,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
 		else
 			return mddev->pers->reconfig(mddev, info->layout, -1);
 	}
-	if (info->size >= 0 && mddev->size != info->size)
+	if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
 		rv = update_size(mddev, (sector_t)info->size * 2);
 
 	if (mddev->raid_disks    != info->raid_disks)
@@ -5331,6 +5631,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
 
 void md_unregister_thread(mdk_thread_t *thread)
 {
+	if (!thread)
+		return;
 	dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
 
 	kthread_stop(thread->tsk);
@@ -5404,7 +5706,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
 		max_blocks = mddev->resync_max_sectors >> 1;
 	else
-		max_blocks = mddev->size;
+		max_blocks = mddev->dev_sectors / 2;
 
 	/*
 	 * Should not happen.
@@ -5537,7 +5839,7 @@ struct mdstat_info {
 static int md_seq_show(struct seq_file *seq, void *v)
 {
 	mddev_t *mddev = v;
-	sector_t size;
+	sector_t sectors;
 	mdk_rdev_t *rdev;
 	struct mdstat_info *mi = seq->private;
 	struct bitmap *bitmap;
@@ -5573,7 +5875,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 			seq_printf(seq, " %s", mddev->pers->name);
 		}
 
-		size = 0;
+		sectors = 0;
 		list_for_each_entry(rdev, &mddev->disks, same_set) {
 			char b[BDEVNAME_SIZE];
 			seq_printf(seq, " %s[%d]",
@@ -5585,7 +5887,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 				continue;
 			} else if (rdev->raid_disk < 0)
 				seq_printf(seq, "(S)"); /* spare */
-			size += rdev->size;
+			sectors += rdev->sectors;
 		}
 
 		if (!list_empty(&mddev->disks)) {
@@ -5595,7 +5897,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 					   mddev->array_sectors / 2);
 			else
 				seq_printf(seq, "\n      %llu blocks",
-					   (unsigned long long)size);
+					   (unsigned long long)sectors / 2);
 		}
 		if (mddev->persistent) {
 			if (mddev->major_version != 0 ||
@@ -5722,19 +6024,19 @@ int unregister_md_personality(struct mdk_personality *p)
 	return 0;
 }
 
-static int is_mddev_idle(mddev_t *mddev)
+static int is_mddev_idle(mddev_t *mddev, int init)
 {
 	mdk_rdev_t * rdev;
 	int idle;
-	long curr_events;
+	int curr_events;
 
 	idle = 1;
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = part_stat_read(&disk->part0, sectors[0]) +
-				part_stat_read(&disk->part0, sectors[1]) -
-				atomic_read(&disk->sync_io);
+		curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
+			      (int)part_stat_read(&disk->part0, sectors[1]) -
+			      atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
 		 * disk_stats is counted when it completes.
@@ -5757,7 +6059,7 @@ static int is_mddev_idle(mddev_t *mddev)
 		 * always make curr_events less than last_events.
 		 *
 		 */
-		if (curr_events - rdev->last_events > 4096) {
+		if (init || curr_events - rdev->last_events > 64) {
 			rdev->last_events = curr_events;
 			idle = 0;
 		}
@@ -5980,10 +6282,10 @@ void md_do_sync(mddev_t *mddev)
 			j = mddev->recovery_cp;
 
 	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-		max_sectors = mddev->size << 1;
+		max_sectors = mddev->dev_sectors;
 	else {
 		/* recovery follows the physical size of devices */
-		max_sectors = mddev->size << 1;
+		max_sectors = mddev->dev_sectors;
 		j = MaxSector;
 		list_for_each_entry(rdev, &mddev->disks, same_set)
 			if (rdev->raid_disk >= 0 &&
@@ -6000,7 +6302,7 @@ void md_do_sync(mddev_t *mddev)
 	       "(but not more than %d KB/sec) for %s.\n",
 	       speed_max(mddev), desc);
 
-	is_mddev_idle(mddev); /* this also initializes IO event counters */
+	is_mddev_idle(mddev, 1); /* this initializes IO event counters */
 
 	io_sectors = 0;
 	for (m = 0; m < SYNC_MARKS; m++) {
@@ -6040,6 +6342,18 @@ void md_do_sync(mddev_t *mddev)
 		}
 		if (kthread_should_stop())
 			goto interrupted;
+
+		if (mddev->curr_resync > mddev->curr_resync_completed &&
+		    (mddev->curr_resync - mddev->curr_resync_completed)
+		    > (max_sectors >> 4)) {
+			/* time to update curr_resync_completed */
+			blk_unplug(mddev->queue);
+			wait_event(mddev->recovery_wait,
+				   atomic_read(&mddev->recovery_active) == 0);
+			mddev->curr_resync_completed =
+				mddev->curr_resync;
+			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+		}
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
 						  currspeed < speed_min(mddev));
 		if (sectors == 0) {
@@ -6102,7 +6416,7 @@ void md_do_sync(mddev_t *mddev)
 
 		if (currspeed > speed_min(mddev)) {
 			if ((currspeed > speed_max(mddev)) ||
-					!is_mddev_idle(mddev)) {
+					!is_mddev_idle(mddev, 0)) {
 				msleep(500);
 				goto repeat;
 			}
@@ -6173,6 +6487,8 @@ static int remove_and_add_spares(mddev_t *mddev)
 	mdk_rdev_t *rdev;
 	int spares = 0;
 
+	mddev->curr_resync_completed = 0;
+
 	list_for_each_entry(rdev, &mddev->disks, same_set)
 		if (rdev->raid_disk >= 0 &&
 		    !test_bit(Blocked, &rdev->flags) &&
@@ -6327,6 +6643,9 @@ void md_check_recovery(mddev_t *mddev)
 					sysfs_notify(&mddev->kobj, NULL,
 						     "degraded");
 			}
+			if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+			    mddev->pers->finish_reshape)
+				mddev->pers->finish_reshape(mddev);
 			md_update_sb(mddev, 1);
 
 			/* if array is no-longer degraded, then any saved_raid_disk
@@ -6470,13 +6789,13 @@ static void md_geninit(void)
 
 static int __init md_init(void)
 {
-	if (register_blkdev(MAJOR_NR, "md"))
+	if (register_blkdev(MD_MAJOR, "md"))
 		return -1;
 	if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
-		unregister_blkdev(MAJOR_NR, "md");
+		unregister_blkdev(MD_MAJOR, "md");
 		return -1;
 	}
-	blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE,
+	blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
 			    md_probe, NULL, NULL);
 	blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
 			    md_probe, NULL, NULL);
@@ -6562,10 +6881,10 @@ static __exit void md_exit(void)
 	mddev_t *mddev;
 	struct list_head *tmp;
 
-	blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS);
+	blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
 	blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
 
-	unregister_blkdev(MAJOR_NR,"md");
+	unregister_blkdev(MD_MAJOR,"md");
 	unregister_blkdev(mdp_major, "mdp");
 	unregister_reboot_notifier(&md_notifier);
 	unregister_sysctl_table(raid_table_header);
diff --git a/include/linux/raid/md_k.h b/drivers/md/md.h
index 9743e4d..e9b7f54 100644
--- a/include/linux/raid/md_k.h
+++ b/drivers/md/md.h
@@ -15,21 +15,8 @@
 #ifndef _MD_K_H
 #define _MD_K_H
 
-/* and dm-bio-list.h is not under include/linux because.... ??? */
-#include "../../../drivers/md/dm-bio-list.h"
-
 #ifdef CONFIG_BLOCK
 
-#define	LEVEL_MULTIPATH		(-4)
-#define	LEVEL_LINEAR		(-1)
-#define	LEVEL_FAULTY		(-5)
-
-/* we need a value for 'no level specified' and 0
- * means 'raid0', so we need something else.  This is
- * for internal use only
- */
-#define	LEVEL_NONE		(-1000000)
-
 #define MaxSector (~(sector_t)0)
 
 typedef struct mddev_s mddev_t;
@@ -49,9 +36,9 @@ struct mdk_rdev_s
 {
 	struct list_head same_set;	/* RAID devices within the same set */
 
-	sector_t size;			/* Device size (in blocks) */
+	sector_t sectors;		/* Device size (in 512bytes sectors) */
 	mddev_t *mddev;			/* RAID array if running */
-	long last_events;		/* IO event timestamp */
+	int last_events;		/* IO event timestamp */
 
 	struct block_device *bdev;	/* block device handle */
 
@@ -132,6 +119,8 @@ struct mddev_s
 #define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
 #define MD_CHANGE_PENDING 2	/* superblock update in progress */
 
+	int				suspended;
+	atomic_t			active_io;
 	int				ro;
 
 	struct gendisk			*gendisk;
@@ -155,8 +144,11 @@ struct mddev_s
 	char				clevel[16];
 	int				raid_disks;
 	int				max_disks;
-	sector_t			size; /* used size of component devices */
+	sector_t			dev_sectors; 	/* used size of
+							 * component devices */
 	sector_t			array_sectors; /* exported array size */
+	int				external_size; /* size managed
+							* externally */
 	__u64				events;
 
 	char				uuid[16];
@@ -172,6 +164,13 @@ struct mddev_s
 	struct mdk_thread_s		*thread;	/* management thread */
 	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
 	sector_t			curr_resync;	/* last block scheduled */
+	/* As resync requests can complete out of order, we cannot easily track
+	 * how much resync has been completed.  So we occasionally pause until
+	 * everything completes, then set curr_resync_completed to curr_resync.
+	 * As such it may be well behind the real resync mark, but it is a value
+	 * we are certain of.
+	 */
+	sector_t			curr_resync_completed;
 	unsigned long			resync_mark;	/* a recent timestamp */
 	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
 	sector_t			curr_mark_cnt; /* blocks scheduled now */
@@ -315,8 +314,10 @@ struct mdk_personality
 	int (*spare_active) (mddev_t *mddev);
 	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
 	int (*resize) (mddev_t *mddev, sector_t sectors);
+	sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
 	int (*check_reshape) (mddev_t *mddev);
 	int (*start_reshape) (mddev_t *mddev);
+	void (*finish_reshape) (mddev_t *mddev);
 	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
 	/* quiesce moves between quiescence states
 	 * 0 - fully active
@@ -324,6 +325,16 @@ struct mdk_personality
 	 * others - reserved
 	 */
 	void (*quiesce) (mddev_t *mddev, int state);
+	/* takeover is used to transition an array from one
+	 * personality to another.  The new personality must be able
+	 * to handle the data in the current layout.
+	 * e.g. 2drive raid1 -> 2drive raid5
+	 *      ndrive raid5 -> degraded n+1drive raid6 with special layout
+	 * If the takeover succeeds, a new 'private' structure is returned.
+	 * This needs to be installed and then ->run used to activate the
+	 * array.
+	 */
+	void *(*takeover) (mddev_t *mddev);
 };
 
 
@@ -400,3 +411,26 @@ static inline void safe_put_page(struct page *p)
 #endif /* CONFIG_BLOCK */
 #endif
 
+
+extern int register_md_personality(struct mdk_personality *p);
+extern int unregister_md_personality(struct mdk_personality *p);
+extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
+				mddev_t *mddev, const char *name);
+extern void md_unregister_thread(mdk_thread_t *thread);
+extern void md_wakeup_thread(mdk_thread_t *thread);
+extern void md_check_recovery(mddev_t *mddev);
+extern void md_write_start(mddev_t *mddev, struct bio *bi);
+extern void md_write_end(mddev_t *mddev);
+extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
+extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
+
+extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+			   sector_t sector, int size, struct page *page);
+extern void md_super_wait(mddev_t *mddev);
+extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+			struct page *page, int rw);
+extern void md_do_sync(mddev_t *mddev);
+extern void md_new_event(mddev_t *mddev);
+extern int md_allow_write(mddev_t *mddev);
+extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
+extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c
index b61d576..3b15008 100644
--- a/drivers/md/mktables.c
+++ b/drivers/md/mktables.c
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
 	uint8_t v;
 	uint8_t exptbl[256], invtbl[256];
 
-	printf("#include \"raid6.h\"\n");
+	printf("#include <linux/raid/pq.h>\n");
 
 	/* Compute multiplication table */
 	printf("\nconst u8  __attribute__((aligned(256)))\n"
@@ -76,6 +76,9 @@ int main(int argc, char *argv[])
 		printf("\t},\n");
 	}
 	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfmul);\n");
+	printf("#endif\n");
 
 	/* Compute power-of-2 table (exponent) */
 	v = 1;
@@ -92,6 +95,9 @@ int main(int argc, char *argv[])
 		}
 	}
 	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfexp);\n");
+	printf("#endif\n");
 
 	/* Compute inverse table x^-1 == x^254 */
 	printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -104,6 +110,9 @@ int main(int argc, char *argv[])
 		}
 	}
 	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfinv);\n");
+	printf("#endif\n");
 
 	/* Compute inv(2^x + 1) (exponent-xor-inverse) table */
 	printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -115,6 +124,9 @@ int main(int argc, char *argv[])
 			       (j == 7) ? '\n' : ' ');
 	}
 	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfexi);\n");
+	printf("#endif\n");
 
 	return 0;
 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index f6d08f2..41ced0c 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -19,7 +19,11 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/raid/multipath.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "multipath.h"
 
 #define MAX_WORK_PER_DISK 128
 
@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev)
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
+static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	WARN_ONCE(sectors || raid_disks,
+		  "%s does not support generic reshape\n", __func__);
+
+	return mddev->dev_sectors;
+}
+
 static int multipath_run (mddev_t *mddev)
 {
 	multipath_conf_t *conf;
@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev)
 	/*
 	 * Ok, everything is just fine now
 	 */
-	mddev->array_sectors = mddev->size * 2;
+	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
 
 	mddev->queue->unplug_fn = multipath_unplug;
 	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality =
 	.error_handler	= multipath_error,
 	.hot_add_disk	= multipath_add_disk,
 	.hot_remove_disk= multipath_remove_disk,
+	.size		= multipath_size,
 };
 
 static int __init multipath_init (void)
diff --git a/include/linux/raid/multipath.h b/drivers/md/multipath.h
index 6f53fc1..6fa70b4 100644
--- a/include/linux/raid/multipath.h
+++ b/drivers/md/multipath.h
@@ -1,8 +1,6 @@
 #ifndef _MULTIPATH_H
 #define _MULTIPATH_H
 
-#include <linux/raid/md.h>
-
 struct multipath_info {
 	mdk_rdev_t	*rdev;
 };
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c605ba8..c08d755 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -18,7 +18,10 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
 */
 
-#include <linux/raid/raid0.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "raid0.h"
 
 static void raid0_unplug(struct request_queue *q)
 {
@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev)
 		list_for_each_entry(rdev2, &mddev->disks, same_set) {
 			printk(KERN_INFO "raid0:   comparing %s(%llu)",
 			       bdevname(rdev1->bdev,b),
-			       (unsigned long long)rdev1->size);
+			       (unsigned long long)rdev1->sectors);
 			printk(KERN_INFO " with %s(%llu)\n",
 			       bdevname(rdev2->bdev,b),
-			       (unsigned long long)rdev2->size);
+			       (unsigned long long)rdev2->sectors);
 			if (rdev2 == rdev1) {
 				printk(KERN_INFO "raid0:   END\n");
 				break;
 			}
-			if (rdev2->size == rdev1->size)
-			{
+			if (rdev2->sectors == rdev1->sectors) {
 				/*
 				 * Not unique, don't count it as a new
 				 * group
@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev)
 		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
-		if (!smallest || (rdev1->size <smallest->size))
+		if (!smallest || (rdev1->sectors < smallest->sectors))
 			smallest = rdev1;
 		cnt++;
 	}
@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev)
 		goto abort;
 	}
 	zone->nb_dev = cnt;
-	zone->sectors = smallest->size * cnt * 2;
+	zone->sectors = smallest->sectors * cnt;
 	zone->zone_start = 0;
 
-	current_start = smallest->size * 2;
+	current_start = smallest->sectors;
 	curr_zone_start = zone->sectors;
 
 	/* now do the other zones */
@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev)
 			rdev = conf->strip_zone[0].dev[j];
 			printk(KERN_INFO "raid0: checking %s ...",
 				bdevname(rdev->bdev, b));
-			if (rdev->size > current_start / 2) {
-				printk(KERN_INFO " contained as device %d\n",
-					c);
-				zone->dev[c] = rdev;
-				c++;
-				if (!smallest || (rdev->size <smallest->size)) {
-					smallest = rdev;
-					printk(KERN_INFO "  (%llu) is smallest!.\n",
-						(unsigned long long)rdev->size);
-				}
-			} else
+			if (rdev->sectors <= current_start) {
 				printk(KERN_INFO " nope.\n");
+				continue;
+			}
+			printk(KERN_INFO " contained as device %d\n", c);
+			zone->dev[c] = rdev;
+			c++;
+			if (!smallest || rdev->sectors < smallest->sectors) {
+				smallest = rdev;
+				printk(KERN_INFO "  (%llu) is smallest!.\n",
+					(unsigned long long)rdev->sectors);
+			}
 		}
 
 		zone->nb_dev = c;
-		zone->sectors = (smallest->size * 2 - current_start) * c;
+		zone->sectors = (smallest->sectors - current_start) * c;
 		printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
 			zone->nb_dev, (unsigned long long)zone->sectors);
 
 		zone->zone_start = curr_zone_start;
 		curr_zone_start += zone->sectors;
 
-		current_start = smallest->size * 2;
+		current_start = smallest->sectors;
 		printk(KERN_INFO "raid0: current zone start: %llu\n",
 			(unsigned long long)current_start);
 	}
@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q,
 		return max;
 }
 
+static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	sector_t array_sectors = 0;
+	mdk_rdev_t *rdev;
+
+	WARN_ONCE(sectors || raid_disks,
+		  "%s does not support generic reshape\n", __func__);
+
+	list_for_each_entry(rdev, &mddev->disks, same_set)
+		array_sectors += rdev->sectors;
+
+	return array_sectors;
+}
+
 static int raid0_run (mddev_t *mddev)
 {
 	unsigned  cur=0, i=0, nb_zone;
 	s64 sectors;
 	raid0_conf_t *conf;
-	mdk_rdev_t *rdev;
 
 	if (mddev->chunk_size == 0) {
 		printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev)
 		goto out_free_conf;
 
 	/* calculate array device size */
-	mddev->array_sectors = 0;
-	list_for_each_entry(rdev, &mddev->disks, same_set)
-		mddev->array_sectors += rdev->size * 2;
+	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
 
 	printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
 		(unsigned long long)mddev->array_sectors);
 	printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
 		(unsigned long long)conf->spacing);
 	{
-		sector_t s = mddev->array_sectors;
+		sector_t s = raid0_size(mddev, 0, 0);
 		sector_t space = conf->spacing;
 		int round;
 		conf->sector_shift = 0;
@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality=
 	.run		= raid0_run,
 	.stop		= raid0_stop,
 	.status		= raid0_status,
+	.size		= raid0_size,
 };
 
 static int __init raid0_init (void)
diff --git a/include/linux/raid/raid0.h b/drivers/md/raid0.h
index fd42aa8..824b12e 100644
--- a/include/linux/raid/raid0.h
+++ b/drivers/md/raid0.h
@@ -1,8 +1,6 @@
 #ifndef _RAID0_H
 #define _RAID0_H
 
-#include <linux/raid/md.h>
-
 struct strip_zone
 {
 	sector_t zone_start;	/* Zone offset in md_dev (in sectors) */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e246642..b4f4bad 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -31,10 +31,13 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include "dm-bio-list.h"
 #include <linux/delay.h>
-#include <linux/raid/raid1.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "dm-bio-list.h"
+#include "raid1.h"
+#include "bitmap.h"
 
 #define DEBUG 0
 #if DEBUG
@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			return 0;
 	}
 
-	max_sector = mddev->size << 1;
+	max_sector = mddev->dev_sectors;
 	if (sector_nr >= max_sector) {
 		/* If we aborted, we need to abort the
 		 * sync on the 'current' bitmap chunk (there will
@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	return nr_sectors;
 }
 
+static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	if (sectors)
+		return sectors;
+
+	return mddev->dev_sectors;
+}
+
 static int run(mddev_t *mddev)
 {
 	conf_t *conf;
@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev)
 	/*
 	 * Ok, everything is just fine now
 	 */
-	mddev->array_sectors = mddev->size * 2;
+	md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
 
 	mddev->queue->unplug_fn = raid1_unplug;
 	mddev->queue->backing_dev_info.congested_fn = raid1_congested;
@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev)
 		/* need to kick something here to make sure I/O goes? */
 	}
 
+	raise_barrier(conf);
+	lower_barrier(conf);
+
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
 	 * any io in the removed space completes, but it hardly seems
 	 * worth it.
 	 */
-	mddev->array_sectors = sectors;
+	md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
+	if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
+		return -EINVAL;
 	set_capacity(mddev->gendisk, mddev->array_sectors);
 	mddev->changed = 1;
-	if (mddev->array_sectors / 2 > mddev->size &&
+	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp == MaxSector) {
-		mddev->recovery_cp = mddev->size << 1;
+		mddev->recovery_cp = mddev->dev_sectors;
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
-	mddev->size = mddev->array_sectors / 2;
+	mddev->dev_sectors = sectors;
 	mddev->resync_max_sectors = sectors;
 	return 0;
 }
@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality =
 	.spare_active	= raid1_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid1_resize,
+	.size		= raid1_size,
 	.check_reshape	= raid1_reshape,
 	.quiesce	= raid1_quiesce,
 };
diff --git a/include/linux/raid/raid1.h b/drivers/md/raid1.h
index 0a9ba7c..1620eea 100644
--- a/include/linux/raid/raid1.h
+++ b/drivers/md/raid1.h
@@ -1,8 +1,6 @@
 #ifndef _RAID1_H
 #define _RAID1_H
 
-#include <linux/raid/md.h>
-
 typedef struct mirror_info mirror_info_t;
 
 struct mirror_info {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7301631..e293d92 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,10 +18,13 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include "dm-bio-list.h"
 #include <linux/delay.h>
-#include <linux/raid/raid10.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "dm-bio-list.h"
+#include "raid10.h"
+#include "bitmap.h"
 
 /*
  * RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			return 0;
 
  skipped:
-	max_sector = mddev->size << 1;
+	max_sector = mddev->dev_sectors;
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
 		max_sector = mddev->resync_max_sectors;
 	if (sector_nr >= max_sector) {
@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	goto skipped;
 }
 
+static sector_t
+raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	sector_t size;
+	conf_t *conf = mddev_to_conf(mddev);
+
+	if (!raid_disks)
+		raid_disks = mddev->raid_disks;
+	if (!sectors)
+		sectors = mddev->dev_sectors;
+
+	size = sectors >> conf->chunk_shift;
+	sector_div(size, conf->far_copies);
+	size = size * raid_disks;
+	sector_div(size, conf->near_copies);
+
+	return size << conf->chunk_shift;
+}
+
 static int run(mddev_t *mddev)
 {
 	conf_t *conf;
@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev)
 	conf->far_offset = fo;
 	conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
 	conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
-	size = mddev->size >> (conf->chunk_shift-1);
+	size = mddev->dev_sectors >> conf->chunk_shift;
 	sector_div(size, fc);
 	size = size * conf->raid_disks;
 	sector_div(size, nc);
@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev)
 	 */
 	stride += conf->raid_disks - 1;
 	sector_div(stride, conf->raid_disks);
-	mddev->size = stride  << (conf->chunk_shift-1);
+	mddev->dev_sectors = stride << conf->chunk_shift;
 
 	if (fo)
 		stride = 1;
@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev)
 	/*
 	 * Ok, everything is just fine now
 	 */
-	mddev->array_sectors = size << conf->chunk_shift;
-	mddev->resync_max_sectors = size << conf->chunk_shift;
+	md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
+	mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
 
 	mddev->queue->unplug_fn = raid10_unplug;
 	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev)
 {
 	conf_t *conf = mddev_to_conf(mddev);
 
+	raise_barrier(conf, 0);
+	lower_barrier(conf);
+
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality =
 	.spare_active	= raid10_spare_active,
 	.sync_request	= sync_request,
 	.quiesce	= raid10_quiesce,
+	.size		= raid10_size,
 };
 
 static int __init raid_init(void)
diff --git a/include/linux/raid/raid10.h b/drivers/md/raid10.h
index e9091cf..244dbe5 100644
--- a/include/linux/raid/raid10.h
+++ b/drivers/md/raid10.h
@@ -1,8 +1,6 @@
 #ifndef _RAID10_H
 #define _RAID10_H
 
-#include <linux/raid/md.h>
-
 typedef struct mirror_info mirror_info_t;
 
 struct mirror_info {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a5ba080..3bbc6d6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -43,11 +43,14 @@
  * miss any bits.
  */
 
+#include <linux/blkdev.h>
 #include <linux/kthread.h>
-#include "raid6.h"
-
-#include <linux/raid/bitmap.h>
+#include <linux/raid/pq.h>
 #include <linux/async_tx.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "raid5.h"
+#include "bitmap.h"
 
 /*
  * Stripe cache
@@ -91,11 +94,6 @@
 
 #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
 
-#if !RAID6_USE_EMPTY_ZERO_PAGE
-/* In .bss so it's zeroed */
-const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
-#endif
-
 /*
  * We maintain a biased count of active stripes in the bottom 16 bits of
  * bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -130,12 +128,42 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
 	bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
 }
 
+/* Find first data disk in a raid6 stripe */
+static inline int raid6_d0(struct stripe_head *sh)
+{
+	if (sh->ddf_layout)
+		/* ddf always start from first device */
+		return 0;
+	/* md starts just after Q block */
+	if (sh->qd_idx == sh->disks - 1)
+		return 0;
+	else
+		return sh->qd_idx + 1;
+}
 static inline int raid6_next_disk(int disk, int raid_disks)
 {
 	disk++;
 	return (disk < raid_disks) ? disk : 0;
 }
 
+/* When walking through the disks in a raid5, starting at raid6_d0,
+ * We need to map each disk to a 'slot', where the data disks are slot
+ * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
+ * is raid_disks-1.  This help does that mapping.
+ */
+static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
+			     int *count, int syndrome_disks)
+{
+	int slot;
+
+	if (idx == sh->pd_idx)
+		return syndrome_disks;
+	if (idx == sh->qd_idx)
+		return syndrome_disks + 1;
+	slot = (*count)++;
+	return slot;
+}
+
 static void return_io(struct bio *return_bi)
 {
 	struct bio *bi = return_bi;
@@ -193,6 +221,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
 		}
 	}
 }
+
 static void release_stripe(struct stripe_head *sh)
 {
 	raid5_conf_t *conf = sh->raid_conf;
@@ -270,9 +299,11 @@ static int grow_buffers(struct stripe_head *sh, int num)
 	return 0;
 }
 
-static void raid5_build_block(struct stripe_head *sh, int i);
+static void raid5_build_block(struct stripe_head *sh, int i, int previous);
+static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
+			    struct stripe_head *sh);
 
-static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
+static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
 {
 	raid5_conf_t *conf = sh->raid_conf;
 	int i;
@@ -287,11 +318,12 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
 
 	remove_hash(sh);
 
+	sh->generation = conf->generation - previous;
+	sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
 	sh->sector = sector;
-	sh->pd_idx = pd_idx;
+	stripe_set_idx(sector, conf, previous, sh);
 	sh->state = 0;
 
-	sh->disks = disks;
 
 	for (i = sh->disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
@@ -305,12 +337,13 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
 			BUG();
 		}
 		dev->flags = 0;
-		raid5_build_block(sh, i);
+		raid5_build_block(sh, i, previous);
 	}
 	insert_hash(conf, sh);
 }
 
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector,
+					 short generation)
 {
 	struct stripe_head *sh;
 	struct hlist_node *hn;
@@ -318,7 +351,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
 	CHECK_DEVLOCK();
 	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
 	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
-		if (sh->sector == sector && sh->disks == disks)
+		if (sh->sector == sector && sh->generation == generation)
 			return sh;
 	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
 	return NULL;
@@ -327,8 +360,9 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
 static void unplug_slaves(mddev_t *mddev);
 static void raid5_unplug_device(struct request_queue *q);
 
-static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
-					     int pd_idx, int noblock)
+static struct stripe_head *
+get_active_stripe(raid5_conf_t *conf, sector_t sector,
+		  int previous, int noblock)
 {
 	struct stripe_head *sh;
 
@@ -340,7 +374,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
 				    conf->device_lock, /* nothing */);
-		sh = __find_stripe(conf, sector, disks);
+		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
 				sh = get_free_stripe(conf);
@@ -358,10 +392,11 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 					);
 				conf->inactive_blocked = 0;
 			} else
-				init_stripe(sh, sector, pd_idx, disks);
+				init_stripe(sh, sector, previous);
 		} else {
 			if (atomic_read(&sh->count)) {
-			  BUG_ON(!list_empty(&sh->lru));
+				BUG_ON(!list_empty(&sh->lru)
+				    && !test_bit(STRIPE_EXPANDING, &sh->state));
 			} else {
 				if (!test_bit(STRIPE_HANDLE, &sh->state))
 					atomic_inc(&conf->active_stripes);
@@ -895,8 +930,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
 	struct kmem_cache *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
-	sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+	sprintf(conf->cache_name[0],
+		"raid%d-%s", conf->level, mdname(conf->mddev));
+	sprintf(conf->cache_name[1],
+		"raid%d-%s-alt", conf->level, mdname(conf->mddev));
 	conf->active_name = 0;
 	sc = kmem_cache_create(conf->cache_name[conf->active_name],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -911,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num)
 	return 0;
 }
 
-#ifdef CONFIG_MD_RAID5_RESHAPE
 static int resize_stripes(raid5_conf_t *conf, int newsize)
 {
 	/* Make all the stripes able to hold 'newsize' devices.
@@ -1036,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 	conf->pool_size = newsize;
 	return err;
 }
-#endif
 
 static int drop_one_stripe(raid5_conf_t *conf)
 {
@@ -1066,7 +1101,7 @@ static void shrink_stripes(raid5_conf_t *conf)
 
 static void raid5_end_read_request(struct bio * bi, int error)
 {
- 	struct stripe_head *sh = bi->bi_private;
+	struct stripe_head *sh = bi->bi_private;
 	raid5_conf_t *conf = sh->raid_conf;
 	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1148,7 +1183,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
 
 static void raid5_end_write_request(struct bio *bi, int error)
 {
- 	struct stripe_head *sh = bi->bi_private;
+	struct stripe_head *sh = bi->bi_private;
 	raid5_conf_t *conf = sh->raid_conf;
 	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1176,9 +1211,9 @@ static void raid5_end_write_request(struct bio *bi, int error)
 }
 
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i);
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
 	
-static void raid5_build_block(struct stripe_head *sh, int i)
+static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
 	struct r5dev *dev = &sh->dev[i];
 
@@ -1194,7 +1229,7 @@ static void raid5_build_block(struct stripe_head *sh, int i)
 	dev->req.bi_private = sh;
 
 	dev->flags = 0;
-	dev->sector = compute_blocknr(sh, i);
+	dev->sector = compute_blocknr(sh, i, previous);
 }
 
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1227,15 +1262,23 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
  * Input: a 'big' sector number,
  * Output: index of the data and parity disk, and the sector # in them.
  */
-static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
-			unsigned int data_disks, unsigned int * dd_idx,
-			unsigned int * pd_idx, raid5_conf_t *conf)
+static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
+				     int previous, int *dd_idx,
+				     struct stripe_head *sh)
 {
 	long stripe;
 	unsigned long chunk_number;
 	unsigned int chunk_offset;
+	int pd_idx, qd_idx;
+	int ddf_layout = 0;
 	sector_t new_sector;
-	int sectors_per_chunk = conf->chunk_size >> 9;
+	int algorithm = previous ? conf->prev_algo
+				 : conf->algorithm;
+	int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+					 : (conf->chunk_size >> 9);
+	int raid_disks = previous ? conf->previous_raid_disks
+				  : conf->raid_disks;
+	int data_disks = raid_disks - conf->max_degraded;
 
 	/* First compute the information on this sector */
 
@@ -1259,68 +1302,170 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 	/*
 	 * Select the parity disk based on the user selected algorithm.
 	 */
+	pd_idx = qd_idx = ~0;
 	switch(conf->level) {
 	case 4:
-		*pd_idx = data_disks;
+		pd_idx = data_disks;
 		break;
 	case 5:
-		switch (conf->algorithm) {
+		switch (algorithm) {
 		case ALGORITHM_LEFT_ASYMMETRIC:
-			*pd_idx = data_disks - stripe % raid_disks;
-			if (*dd_idx >= *pd_idx)
+			pd_idx = data_disks - stripe % raid_disks;
+			if (*dd_idx >= pd_idx)
 				(*dd_idx)++;
 			break;
 		case ALGORITHM_RIGHT_ASYMMETRIC:
-			*pd_idx = stripe % raid_disks;
-			if (*dd_idx >= *pd_idx)
+			pd_idx = stripe % raid_disks;
+			if (*dd_idx >= pd_idx)
 				(*dd_idx)++;
 			break;
 		case ALGORITHM_LEFT_SYMMETRIC:
-			*pd_idx = data_disks - stripe % raid_disks;
-			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
+			pd_idx = data_disks - stripe % raid_disks;
+			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
 			break;
 		case ALGORITHM_RIGHT_SYMMETRIC:
-			*pd_idx = stripe % raid_disks;
-			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
+			pd_idx = stripe % raid_disks;
+			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
+			break;
+		case ALGORITHM_PARITY_0:
+			pd_idx = 0;
+			(*dd_idx)++;
+			break;
+		case ALGORITHM_PARITY_N:
+			pd_idx = data_disks;
 			break;
 		default:
 			printk(KERN_ERR "raid5: unsupported algorithm %d\n",
-				conf->algorithm);
+				algorithm);
+			BUG();
 		}
 		break;
 	case 6:
 
-		/**** FIX THIS ****/
-		switch (conf->algorithm) {
+		switch (algorithm) {
 		case ALGORITHM_LEFT_ASYMMETRIC:
-			*pd_idx = raid_disks - 1 - (stripe % raid_disks);
-			if (*pd_idx == raid_disks-1)
-				(*dd_idx)++; 	/* Q D D D P */
-			else if (*dd_idx >= *pd_idx)
+			pd_idx = raid_disks - 1 - (stripe % raid_disks);
+			qd_idx = pd_idx + 1;
+			if (pd_idx == raid_disks-1) {
+				(*dd_idx)++;	/* Q D D D P */
+				qd_idx = 0;
+			} else if (*dd_idx >= pd_idx)
 				(*dd_idx) += 2; /* D D P Q D */
 			break;
 		case ALGORITHM_RIGHT_ASYMMETRIC:
-			*pd_idx = stripe % raid_disks;
-			if (*pd_idx == raid_disks-1)
-				(*dd_idx)++; 	/* Q D D D P */
-			else if (*dd_idx >= *pd_idx)
+			pd_idx = stripe % raid_disks;
+			qd_idx = pd_idx + 1;
+			if (pd_idx == raid_disks-1) {
+				(*dd_idx)++;	/* Q D D D P */
+				qd_idx = 0;
+			} else if (*dd_idx >= pd_idx)
 				(*dd_idx) += 2; /* D D P Q D */
 			break;
 		case ALGORITHM_LEFT_SYMMETRIC:
-			*pd_idx = raid_disks - 1 - (stripe % raid_disks);
-			*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
+			pd_idx = raid_disks - 1 - (stripe % raid_disks);
+			qd_idx = (pd_idx + 1) % raid_disks;
+			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
 			break;
 		case ALGORITHM_RIGHT_SYMMETRIC:
-			*pd_idx = stripe % raid_disks;
-			*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
+			pd_idx = stripe % raid_disks;
+			qd_idx = (pd_idx + 1) % raid_disks;
+			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
+			break;
+
+		case ALGORITHM_PARITY_0:
+			pd_idx = 0;
+			qd_idx = 1;
+			(*dd_idx) += 2;
+			break;
+		case ALGORITHM_PARITY_N:
+			pd_idx = data_disks;
+			qd_idx = data_disks + 1;
 			break;
+
+		case ALGORITHM_ROTATING_ZERO_RESTART:
+			/* Exactly the same as RIGHT_ASYMMETRIC, but or
+			 * of blocks for computing Q is different.
+			 */
+			pd_idx = stripe % raid_disks;
+			qd_idx = pd_idx + 1;
+			if (pd_idx == raid_disks-1) {
+				(*dd_idx)++;	/* Q D D D P */
+				qd_idx = 0;
+			} else if (*dd_idx >= pd_idx)
+				(*dd_idx) += 2; /* D D P Q D */
+			ddf_layout = 1;
+			break;
+
+		case ALGORITHM_ROTATING_N_RESTART:
+			/* Same a left_asymmetric, by first stripe is
+			 * D D D P Q  rather than
+			 * Q D D D P
+			 */
+			pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
+			qd_idx = pd_idx + 1;
+			if (pd_idx == raid_disks-1) {
+				(*dd_idx)++;	/* Q D D D P */
+				qd_idx = 0;
+			} else if (*dd_idx >= pd_idx)
+				(*dd_idx) += 2; /* D D P Q D */
+			ddf_layout = 1;
+			break;
+
+		case ALGORITHM_ROTATING_N_CONTINUE:
+			/* Same as left_symmetric but Q is before P */
+			pd_idx = raid_disks - 1 - (stripe % raid_disks);
+			qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
+			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
+			ddf_layout = 1;
+			break;
+
+		case ALGORITHM_LEFT_ASYMMETRIC_6:
+			/* RAID5 left_asymmetric, with Q on last device */
+			pd_idx = data_disks - stripe % (raid_disks-1);
+			if (*dd_idx >= pd_idx)
+				(*dd_idx)++;
+			qd_idx = raid_disks - 1;
+			break;
+
+		case ALGORITHM_RIGHT_ASYMMETRIC_6:
+			pd_idx = stripe % (raid_disks-1);
+			if (*dd_idx >= pd_idx)
+				(*dd_idx)++;
+			qd_idx = raid_disks - 1;
+			break;
+
+		case ALGORITHM_LEFT_SYMMETRIC_6:
+			pd_idx = data_disks - stripe % (raid_disks-1);
+			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+			qd_idx = raid_disks - 1;
+			break;
+
+		case ALGORITHM_RIGHT_SYMMETRIC_6:
+			pd_idx = stripe % (raid_disks-1);
+			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+			qd_idx = raid_disks - 1;
+			break;
+
+		case ALGORITHM_PARITY_0_6:
+			pd_idx = 0;
+			(*dd_idx)++;
+			qd_idx = raid_disks - 1;
+			break;
+
+
 		default:
 			printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
-			       conf->algorithm);
+			       algorithm);
+			BUG();
 		}
 		break;
 	}
 
+	if (sh) {
+		sh->pd_idx = pd_idx;
+		sh->qd_idx = qd_idx;
+		sh->ddf_layout = ddf_layout;
+	}
 	/*
 	 * Finally, compute the new sector number
 	 */
@@ -1329,17 +1474,21 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 }
 
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i)
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
 {
 	raid5_conf_t *conf = sh->raid_conf;
 	int raid_disks = sh->disks;
 	int data_disks = raid_disks - conf->max_degraded;
 	sector_t new_sector = sh->sector, check;
-	int sectors_per_chunk = conf->chunk_size >> 9;
+	int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+					 : (conf->chunk_size >> 9);
+	int algorithm = previous ? conf->prev_algo
+				 : conf->algorithm;
 	sector_t stripe;
 	int chunk_offset;
-	int chunk_number, dummy1, dummy2, dd_idx = i;
+	int chunk_number, dummy1, dd_idx = i;
 	sector_t r_sector;
+	struct stripe_head sh2;
 
 
 	chunk_offset = sector_div(new_sector, sectors_per_chunk);
@@ -1351,7 +1500,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 	switch(conf->level) {
 	case 4: break;
 	case 5:
-		switch (conf->algorithm) {
+		switch (algorithm) {
 		case ALGORITHM_LEFT_ASYMMETRIC:
 		case ALGORITHM_RIGHT_ASYMMETRIC:
 			if (i > sh->pd_idx)
@@ -1363,19 +1512,27 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 				i += raid_disks;
 			i -= (sh->pd_idx + 1);
 			break;
+		case ALGORITHM_PARITY_0:
+			i -= 1;
+			break;
+		case ALGORITHM_PARITY_N:
+			break;
 		default:
 			printk(KERN_ERR "raid5: unsupported algorithm %d\n",
-			       conf->algorithm);
+			       algorithm);
+			BUG();
 		}
 		break;
 	case 6:
-		if (i == raid6_next_disk(sh->pd_idx, raid_disks))
+		if (i == sh->qd_idx)
 			return 0; /* It is the Q disk */
-		switch (conf->algorithm) {
+		switch (algorithm) {
 		case ALGORITHM_LEFT_ASYMMETRIC:
 		case ALGORITHM_RIGHT_ASYMMETRIC:
-		  	if (sh->pd_idx == raid_disks-1)
-				i--; 	/* Q D D D P */
+		case ALGORITHM_ROTATING_ZERO_RESTART:
+		case ALGORITHM_ROTATING_N_RESTART:
+			if (sh->pd_idx == raid_disks-1)
+				i--;	/* Q D D D P */
 			else if (i > sh->pd_idx)
 				i -= 2; /* D D P Q D */
 			break;
@@ -1390,9 +1547,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 				i -= (sh->pd_idx + 2);
 			}
 			break;
+		case ALGORITHM_PARITY_0:
+			i -= 2;
+			break;
+		case ALGORITHM_PARITY_N:
+			break;
+		case ALGORITHM_ROTATING_N_CONTINUE:
+			if (sh->pd_idx == 0)
+				i--;	/* P D D D Q */
+			else if (i > sh->pd_idx)
+				i -= 2; /* D D Q P D */
+			break;
+		case ALGORITHM_LEFT_ASYMMETRIC_6:
+		case ALGORITHM_RIGHT_ASYMMETRIC_6:
+			if (i > sh->pd_idx)
+				i--;
+			break;
+		case ALGORITHM_LEFT_SYMMETRIC_6:
+		case ALGORITHM_RIGHT_SYMMETRIC_6:
+			if (i < sh->pd_idx)
+				i += data_disks + 1;
+			i -= (sh->pd_idx + 1);
+			break;
+		case ALGORITHM_PARITY_0_6:
+			i -= 1;
+			break;
 		default:
 			printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
-			       conf->algorithm);
+			       algorithm);
+			BUG();
 		}
 		break;
 	}
@@ -1400,8 +1583,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 	chunk_number = stripe * data_disks + i;
 	r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
 
-	check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
-	if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
+	check = raid5_compute_sector(conf, r_sector,
+				     previous, &dummy1, &sh2);
+	if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
+		|| sh2.qd_idx != sh->qd_idx) {
 		printk(KERN_ERR "compute_blocknr: map not correct\n");
 		return 0;
 	}
@@ -1468,14 +1653,16 @@ static void copy_data(int frombio, struct bio *bio,
 
 static void compute_parity6(struct stripe_head *sh, int method)
 {
-	raid6_conf_t *conf = sh->raid_conf;
-	int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
+	raid5_conf_t *conf = sh->raid_conf;
+	int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
+	int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
 	struct bio *chosen;
 	/**** FIX THIS: This could be very bad if disks is close to 256 ****/
-	void *ptrs[disks];
+	void *ptrs[syndrome_disks+2];
 
-	qd_idx = raid6_next_disk(pd_idx, disks);
-	d0_idx = raid6_next_disk(qd_idx, disks);
+	pd_idx = sh->pd_idx;
+	qd_idx = sh->qd_idx;
+	d0_idx = raid6_d0(sh);
 
 	pr_debug("compute_parity, stripe %llu, method %d\n",
 		(unsigned long long)sh->sector, method);
@@ -1513,24 +1700,29 @@ static void compute_parity6(struct stripe_head *sh, int method)
 			set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		}
 
-//	switch(method) {
-//	case RECONSTRUCT_WRITE:
-//	case CHECK_PARITY:
-//	case UPDATE_PARITY:
-		/* Note that unlike RAID-5, the ordering of the disks matters greatly. */
-		/* FIX: Is this ordering of drives even remotely optimal? */
-		count = 0;
-		i = d0_idx;
-		do {
-			ptrs[count++] = page_address(sh->dev[i].page);
-			if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))
-				printk("block %d/%d not uptodate on parity calc\n", i,count);
-			i = raid6_next_disk(i, disks);
-		} while ( i != d0_idx );
-//		break;
-//	}
-
-	raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
+	/* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
+
+	for (i = 0; i < disks; i++)
+		ptrs[i] = (void *)raid6_empty_zero_page;
+
+	count = 0;
+	i = d0_idx;
+	do {
+		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+		ptrs[slot] = page_address(sh->dev[i].page);
+		if (slot < syndrome_disks &&
+		    !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
+			printk(KERN_ERR "block %d/%d not uptodate "
+			       "on parity calc\n", i, count);
+			BUG();
+		}
+
+		i = raid6_next_disk(i, disks);
+	} while (i != d0_idx);
+	BUG_ON(count != syndrome_disks);
+
+	raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
 
 	switch(method) {
 	case RECONSTRUCT_WRITE:
@@ -1552,8 +1744,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
 {
 	int i, count, disks = sh->disks;
 	void *ptr[MAX_XOR_BLOCKS], *dest, *p;
-	int pd_idx = sh->pd_idx;
-	int qd_idx = raid6_next_disk(pd_idx, disks);
+	int qd_idx = sh->qd_idx;
 
 	pr_debug("compute_block_1, stripe %llu, idx %d\n",
 		(unsigned long long)sh->sector, dd_idx);
@@ -1589,63 +1780,65 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
 static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 {
 	int i, count, disks = sh->disks;
-	int pd_idx = sh->pd_idx;
-	int qd_idx = raid6_next_disk(pd_idx, disks);
-	int d0_idx = raid6_next_disk(qd_idx, disks);
-	int faila, failb;
+	int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+	int d0_idx = raid6_d0(sh);
+	int faila = -1, failb = -1;
+	/**** FIX THIS: This could be very bad if disks is close to 256 ****/
+	void *ptrs[syndrome_disks+2];
 
-	/* faila and failb are disk numbers relative to d0_idx */
-	/* pd_idx become disks-2 and qd_idx become disks-1 */
-	faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx;
-	failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx;
+	for (i = 0; i < disks ; i++)
+		ptrs[i] = (void *)raid6_empty_zero_page;
+	count = 0;
+	i = d0_idx;
+	do {
+		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+		ptrs[slot] = page_address(sh->dev[i].page);
+
+		if (i == dd_idx1)
+			faila = slot;
+		if (i == dd_idx2)
+			failb = slot;
+		i = raid6_next_disk(i, disks);
+	} while (i != d0_idx);
+	BUG_ON(count != syndrome_disks);
 
 	BUG_ON(faila == failb);
 	if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
 
 	pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
-	       (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
+		 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
+		 faila, failb);
 
-	if ( failb == disks-1 ) {
+	if (failb == syndrome_disks+1) {
 		/* Q disk is one of the missing disks */
-		if ( faila == disks-2 ) {
+		if (faila == syndrome_disks) {
 			/* Missing P+Q, just recompute */
 			compute_parity6(sh, UPDATE_PARITY);
 			return;
 		} else {
 			/* We're missing D+Q; recompute D from P */
-			compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
+			compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
+					     dd_idx2 : dd_idx1),
+					0);
 			compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
 			return;
 		}
 	}
 
-	/* We're missing D+P or D+D; build pointer table */
-	{
-		/**** FIX THIS: This could be very bad if disks is close to 256 ****/
-		void *ptrs[disks];
-
-		count = 0;
-		i = d0_idx;
-		do {
-			ptrs[count++] = page_address(sh->dev[i].page);
-			i = raid6_next_disk(i, disks);
-			if (i != dd_idx1 && i != dd_idx2 &&
-			    !test_bit(R5_UPTODATE, &sh->dev[i].flags))
-				printk("compute_2 with missing block %d/%d\n", count, i);
-		} while ( i != d0_idx );
-
-		if ( failb == disks-2 ) {
-			/* We're missing D+P. */
-			raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
-		} else {
-			/* We're missing D+D. */
-			raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
-		}
-
-		/* Both the above update both missing blocks */
-		set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
-		set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
+	/* We're missing D+P or D+D; */
+	if (failb == syndrome_disks) {
+		/* We're missing D+P. */
+		raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
+	} else {
+		/* We're missing D+D. */
+		raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
+				  ptrs);
 	}
+
+	/* Both the above update both missing blocks */
+	set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
+	set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
 }
 
 static void
@@ -1800,17 +1993,21 @@ static int page_is_zero(struct page *p)
 		memcmp(a, a+4, STRIPE_SIZE-4)==0);
 }
 
-static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
+static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
+			    struct stripe_head *sh)
 {
-	int sectors_per_chunk = conf->chunk_size >> 9;
-	int pd_idx, dd_idx;
+	int sectors_per_chunk =
+		previous ? (conf->prev_chunk >> 9)
+			 : (conf->chunk_size >> 9);
+	int dd_idx;
 	int chunk_offset = sector_div(stripe, sectors_per_chunk);
+	int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
 
-	raid5_compute_sector(stripe * (disks - conf->max_degraded)
+	raid5_compute_sector(conf,
+			     stripe * (disks - conf->max_degraded)
 			     *sectors_per_chunk + chunk_offset,
-			     disks, disks - conf->max_degraded,
-			     &dd_idx, &pd_idx, conf);
-	return pd_idx;
+			     previous,
+			     &dd_idx, sh);
 }
 
 static void
@@ -2181,7 +2378,7 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
 		struct r6_state *r6s, int disks)
 {
 	int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
-	int qd_idx = r6s->qd_idx;
+	int qd_idx = sh->qd_idx;
 	for (i = disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
 		/* Would I have to read this buffer for reconstruct_write */
@@ -2371,7 +2568,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
 	int update_p = 0, update_q = 0;
 	struct r5dev *dev;
 	int pd_idx = sh->pd_idx;
-	int qd_idx = r6s->qd_idx;
+	int qd_idx = sh->qd_idx;
 
 	set_bit(STRIPE_HANDLE, &sh->state);
 
@@ -2467,17 +2664,14 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 	struct dma_async_tx_descriptor *tx = NULL;
 	clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 	for (i = 0; i < sh->disks; i++)
-		if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) {
-			int dd_idx, pd_idx, j;
+		if (i != sh->pd_idx && i != sh->qd_idx) {
+			int dd_idx, j;
 			struct stripe_head *sh2;
 
-			sector_t bn = compute_blocknr(sh, i);
-			sector_t s = raid5_compute_sector(bn, conf->raid_disks,
-						conf->raid_disks -
-						conf->max_degraded, &dd_idx,
-						&pd_idx, conf);
-			sh2 = get_active_stripe(conf, s, conf->raid_disks,
-						pd_idx, 1);
+			sector_t bn = compute_blocknr(sh, i, 1);
+			sector_t s = raid5_compute_sector(conf, bn, 0,
+							  &dd_idx, NULL);
+			sh2 = get_active_stripe(conf, s, 0, 1);
 			if (sh2 == NULL)
 				/* so far only the early blocks of this stripe
 				 * have been requested.  When later blocks
@@ -2500,8 +2694,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
 			for (j = 0; j < conf->raid_disks; j++)
 				if (j != sh2->pd_idx &&
-				    (!r6s || j != raid6_next_disk(sh2->pd_idx,
-								 sh2->disks)) &&
+				    (!r6s || j != sh2->qd_idx) &&
 				    !test_bit(R5_Expanded, &sh2->dev[j].flags))
 					break;
 			if (j == conf->raid_disks) {
@@ -2750,6 +2943,23 @@ static bool handle_stripe5(struct stripe_head *sh)
 
 	/* Finish reconstruct operations initiated by the expansion process */
 	if (sh->reconstruct_state == reconstruct_state_result) {
+		struct stripe_head *sh2
+			= get_active_stripe(conf, sh->sector, 1, 1);
+		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+			/* sh cannot be written until sh2 has been read.
+			 * so arrange for sh to be delayed a little
+			 */
+			set_bit(STRIPE_DELAYED, &sh->state);
+			set_bit(STRIPE_HANDLE, &sh->state);
+			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+					      &sh2->state))
+				atomic_inc(&conf->preread_active_stripes);
+			release_stripe(sh2);
+			goto unlock;
+		}
+		if (sh2)
+			release_stripe(sh2);
+
 		sh->reconstruct_state = reconstruct_state_idle;
 		clear_bit(STRIPE_EXPANDING, &sh->state);
 		for (i = conf->raid_disks; i--; ) {
@@ -2763,8 +2973,7 @@ static bool handle_stripe5(struct stripe_head *sh)
 	    !sh->reconstruct_state) {
 		/* Need to write out all blocks after computing parity */
 		sh->disks = conf->raid_disks;
-		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
-			conf->raid_disks);
+		stripe_set_idx(sh->sector, conf, 0, sh);
 		schedule_reconstruction5(sh, &s, 1, 1);
 	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -2796,20 +3005,19 @@ static bool handle_stripe5(struct stripe_head *sh)
 
 static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 {
-	raid6_conf_t *conf = sh->raid_conf;
+	raid5_conf_t *conf = sh->raid_conf;
 	int disks = sh->disks;
 	struct bio *return_bi = NULL;
-	int i, pd_idx = sh->pd_idx;
+	int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
 	struct stripe_head_state s;
 	struct r6_state r6s;
 	struct r5dev *dev, *pdev, *qdev;
 	mdk_rdev_t *blocked_rdev = NULL;
 
-	r6s.qd_idx = raid6_next_disk(pd_idx, disks);
 	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
 		"pd_idx=%d, qd_idx=%d\n",
 	       (unsigned long long)sh->sector, sh->state,
-	       atomic_read(&sh->count), pd_idx, r6s.qd_idx);
+	       atomic_read(&sh->count), pd_idx, qd_idx);
 	memset(&s, 0, sizeof(s));
 
 	spin_lock(&sh->lock);
@@ -2920,9 +3128,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	pdev = &sh->dev[pd_idx];
 	r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
 		|| (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
-	qdev = &sh->dev[r6s.qd_idx];
-	r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
-		|| (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
+	qdev = &sh->dev[qd_idx];
+	r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
+		|| (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
 
 	if ( s.written &&
 	     ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
@@ -2980,10 +3188,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 
 	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+		struct stripe_head *sh2
+			= get_active_stripe(conf, sh->sector, 1, 1);
+		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+			/* sh cannot be written until sh2 has been read.
+			 * so arrange for sh to be delayed a little
+			 */
+			set_bit(STRIPE_DELAYED, &sh->state);
+			set_bit(STRIPE_HANDLE, &sh->state);
+			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+					      &sh2->state))
+				atomic_inc(&conf->preread_active_stripes);
+			release_stripe(sh2);
+			goto unlock;
+		}
+		if (sh2)
+			release_stripe(sh2);
+
 		/* Need to write out all blocks after computing P&Q */
 		sh->disks = conf->raid_disks;
-		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
-					     conf->raid_disks);
+		stripe_set_idx(sh->sector, conf, 0, sh);
 		compute_parity6(sh, RECONSTRUCT_WRITE);
 		for (i = conf->raid_disks ; i-- ;  ) {
 			set_bit(R5_LOCKED, &sh->dev[i].flags);
@@ -3134,6 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q,
 	if ((bvm->bi_rw & 1) == WRITE)
 		return biovec->bv_len; /* always allow writes to be mergeable */
 
+	if (mddev->new_chunk < mddev->chunk_size)
+		chunk_sectors = mddev->new_chunk >> 9;
 	max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
 	if (max < 0) max = 0;
 	if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3149,6 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
 	unsigned int chunk_sectors = mddev->chunk_size >> 9;
 	unsigned int bio_sectors = bio->bi_size >> 9;
 
+	if (mddev->new_chunk < mddev->chunk_size)
+		chunk_sectors = mddev->new_chunk >> 9;
 	return  chunk_sectors >=
 		((sector & (chunk_sectors - 1)) + bio_sectors);
 }
@@ -3255,9 +3483,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
 {
 	mddev_t *mddev = q->queuedata;
 	raid5_conf_t *conf = mddev_to_conf(mddev);
-	const unsigned int raid_disks = conf->raid_disks;
-	const unsigned int data_disks = raid_disks - conf->max_degraded;
-	unsigned int dd_idx, pd_idx;
+	unsigned int dd_idx;
 	struct bio* align_bi;
 	mdk_rdev_t *rdev;
 
@@ -3266,7 +3492,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
 		return 0;
 	}
 	/*
- 	 * use bio_clone to make a copy of the bio
+	 * use bio_clone to make a copy of the bio
 	 */
 	align_bi = bio_clone(raid_bio, GFP_NOIO);
 	if (!align_bi)
@@ -3280,12 +3506,9 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
 	/*
 	 *	compute position
 	 */
-	align_bi->bi_sector =  raid5_compute_sector(raid_bio->bi_sector,
-					raid_disks,
-					data_disks,
-					&dd_idx,
-					&pd_idx,
-					conf);
+	align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
+						    0,
+						    &dd_idx, NULL);
 
 	rcu_read_lock();
 	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
@@ -3377,7 +3600,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
 {
 	mddev_t *mddev = q->queuedata;
 	raid5_conf_t *conf = mddev_to_conf(mddev);
-	unsigned int dd_idx, pd_idx;
+	int dd_idx;
 	sector_t new_sector;
 	sector_t logical_sector, last_sector;
 	struct stripe_head *sh;
@@ -3400,7 +3623,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
 	     chunk_aligned_read(q,bi))
-            	return 0;
+		return 0;
 
 	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 	last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -3410,26 +3633,31 @@ static int make_request(struct request_queue *q, struct bio * bi)
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
 		int disks, data_disks;
+		int previous;
 
 	retry:
+		previous = 0;
+		disks = conf->raid_disks;
 		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-		if (likely(conf->expand_progress == MaxSector))
-			disks = conf->raid_disks;
-		else {
-			/* spinlock is needed as expand_progress may be
+		if (unlikely(conf->reshape_progress != MaxSector)) {
+			/* spinlock is needed as reshape_progress may be
 			 * 64bit on a 32bit platform, and so it might be
 			 * possible to see a half-updated value
-			 * Ofcourse expand_progress could change after
+			 * Ofcourse reshape_progress could change after
 			 * the lock is dropped, so once we get a reference
 			 * to the stripe that we think it is, we will have
 			 * to check again.
 			 */
 			spin_lock_irq(&conf->device_lock);
-			disks = conf->raid_disks;
-			if (logical_sector >= conf->expand_progress)
+			if (mddev->delta_disks < 0
+			    ? logical_sector < conf->reshape_progress
+			    : logical_sector >= conf->reshape_progress) {
 				disks = conf->previous_raid_disks;
-			else {
-				if (logical_sector >= conf->expand_lo) {
+				previous = 1;
+			} else {
+				if (mddev->delta_disks < 0
+				    ? logical_sector < conf->reshape_safe
+				    : logical_sector >= conf->reshape_safe) {
 					spin_unlock_irq(&conf->device_lock);
 					schedule();
 					goto retry;
@@ -3439,15 +3667,17 @@ static int make_request(struct request_queue *q, struct bio * bi)
 		}
 		data_disks = disks - conf->max_degraded;
 
- 		new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
-						  &dd_idx, &pd_idx, conf);
+		new_sector = raid5_compute_sector(conf, logical_sector,
+						  previous,
+						  &dd_idx, NULL);
 		pr_debug("raid5: make_request, sector %llu logical %llu\n",
 			(unsigned long long)new_sector, 
 			(unsigned long long)logical_sector);
 
-		sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
+		sh = get_active_stripe(conf, new_sector, previous,
+				       (bi->bi_rw&RWA_MASK));
 		if (sh) {
-			if (unlikely(conf->expand_progress != MaxSector)) {
+			if (unlikely(previous)) {
 				/* expansion might have moved on while waiting for a
 				 * stripe, so we must do the range check again.
 				 * Expansion could still move past after this
@@ -3458,8 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
 				 */
 				int must_retry = 0;
 				spin_lock_irq(&conf->device_lock);
-				if (logical_sector <  conf->expand_progress &&
-				    disks == conf->previous_raid_disks)
+				if (mddev->delta_disks < 0
+				    ? logical_sector >= conf->reshape_progress
+				    : logical_sector < conf->reshape_progress)
 					/* mismatch, need to try again */
 					must_retry = 1;
 				spin_unlock_irq(&conf->device_lock);
@@ -3514,6 +3745,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
 	return 0;
 }
 
+static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
+
 static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
 {
 	/* reshaping is quite different to recovery/resync so it is
@@ -3527,61 +3760,118 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	 */
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	struct stripe_head *sh;
-	int pd_idx;
 	sector_t first_sector, last_sector;
 	int raid_disks = conf->previous_raid_disks;
 	int data_disks = raid_disks - conf->max_degraded;
 	int new_data_disks = conf->raid_disks - conf->max_degraded;
 	int i;
 	int dd_idx;
-	sector_t writepos, safepos, gap;
-
-	if (sector_nr == 0 &&
-	    conf->expand_progress != 0) {
-		/* restarting in the middle, skip the initial sectors */
-		sector_nr = conf->expand_progress;
+	sector_t writepos, readpos, safepos;
+	sector_t stripe_addr;
+	int reshape_sectors;
+	struct list_head stripes;
+
+	if (sector_nr == 0) {
+		/* If restarting in the middle, skip the initial sectors */
+		if (mddev->delta_disks < 0 &&
+		    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
+			sector_nr = raid5_size(mddev, 0, 0)
+				- conf->reshape_progress;
+		} else if (mddev->delta_disks > 0 &&
+			   conf->reshape_progress > 0)
+			sector_nr = conf->reshape_progress;
 		sector_div(sector_nr, new_data_disks);
-		*skipped = 1;
-		return sector_nr;
+		if (sector_nr) {
+			*skipped = 1;
+			return sector_nr;
+		}
 	}
 
+	/* We need to process a full chunk at a time.
+	 * If old and new chunk sizes differ, we need to process the
+	 * largest of these
+	 */
+	if (mddev->new_chunk > mddev->chunk_size)
+		reshape_sectors = mddev->new_chunk / 512;
+	else
+		reshape_sectors = mddev->chunk_size / 512;
+
 	/* we update the metadata when there is more than 3Meg
 	 * in the block range (that is rather arbitrary, should
 	 * probably be time based) or when the data about to be
 	 * copied would over-write the source of the data at
 	 * the front of the range.
-	 * i.e. one new_stripe forward from expand_progress new_maps
-	 * to after where expand_lo old_maps to
+	 * i.e. one new_stripe along from reshape_progress new_maps
+	 * to after where reshape_safe old_maps to
 	 */
-	writepos = conf->expand_progress +
-		conf->chunk_size/512*(new_data_disks);
+	writepos = conf->reshape_progress;
 	sector_div(writepos, new_data_disks);
-	safepos = conf->expand_lo;
+	readpos = conf->reshape_progress;
+	sector_div(readpos, data_disks);
+	safepos = conf->reshape_safe;
 	sector_div(safepos, data_disks);
-	gap = conf->expand_progress - conf->expand_lo;
+	if (mddev->delta_disks < 0) {
+		writepos -= reshape_sectors;
+		readpos += reshape_sectors;
+		safepos += reshape_sectors;
+	} else {
+		writepos += reshape_sectors;
+		readpos -= reshape_sectors;
+		safepos -= reshape_sectors;
+	}
 
-	if (writepos >= safepos ||
-	    gap > (new_data_disks)*3000*2 /*3Meg*/) {
+	/* 'writepos' is the most advanced device address we might write.
+	 * 'readpos' is the least advanced device address we might read.
+	 * 'safepos' is the least address recorded in the metadata as having
+	 *     been reshaped.
+	 * If 'readpos' is behind 'writepos', then there is no way that we can
+	 * ensure safety in the face of a crash - that must be done by userspace
+	 * making a backup of the data.  So in that case there is no particular
+	 * rush to update metadata.
+	 * Otherwise if 'safepos' is behind 'writepos', then we really need to
+	 * update the metadata to advance 'safepos' to match 'readpos' so that
+	 * we can be safe in the event of a crash.
+	 * So we insist on updating metadata if safepos is behind writepos and
+	 * readpos is beyond writepos.
+	 * In any case, update the metadata every 10 seconds.
+	 * Maybe that number should be configurable, but I'm not sure it is
+	 * worth it.... maybe it could be a multiple of safemode_delay???
+	 */
+	if ((mddev->delta_disks < 0
+	     ? (safepos > writepos && readpos < writepos)
+	     : (safepos < writepos && readpos > writepos)) ||
+	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
 		/* Cannot proceed until we've updated the superblock... */
 		wait_event(conf->wait_for_overlap,
 			   atomic_read(&conf->reshape_stripes)==0);
-		mddev->reshape_position = conf->expand_progress;
+		mddev->reshape_position = conf->reshape_progress;
+		conf->reshape_checkpoint = jiffies;
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait, mddev->flags == 0 ||
 			   kthread_should_stop());
 		spin_lock_irq(&conf->device_lock);
-		conf->expand_lo = mddev->reshape_position;
+		conf->reshape_safe = mddev->reshape_position;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_for_overlap);
 	}
 
-	for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
+	if (mddev->delta_disks < 0) {
+		BUG_ON(conf->reshape_progress == 0);
+		stripe_addr = writepos;
+		BUG_ON((mddev->dev_sectors &
+			~((sector_t)reshape_sectors - 1))
+		       - reshape_sectors - stripe_addr
+		       != sector_nr);
+	} else {
+		BUG_ON(writepos != sector_nr + reshape_sectors);
+		stripe_addr = sector_nr;
+	}
+	INIT_LIST_HEAD(&stripes);
+	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
 		int j;
 		int skipped = 0;
-		pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
-		sh = get_active_stripe(conf, sector_nr+i,
-				       conf->raid_disks, pd_idx, 0);
+		sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
 		set_bit(STRIPE_EXPANDING, &sh->state);
 		atomic_inc(&conf->reshape_stripes);
 		/* If any of this stripe is beyond the end of the old
@@ -3592,10 +3882,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 			if (j == sh->pd_idx)
 				continue;
 			if (conf->level == 6 &&
-			    j == raid6_next_disk(sh->pd_idx, sh->disks))
+			    j == sh->qd_idx)
 				continue;
-			s = compute_blocknr(sh, j);
-			if (s < mddev->array_sectors) {
+			s = compute_blocknr(sh, j, 0);
+			if (s < raid5_size(mddev, 0, 0)) {
 				skipped = 1;
 				continue;
 			}
@@ -3607,10 +3897,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 			set_bit(STRIPE_EXPAND_READY, &sh->state);
 			set_bit(STRIPE_HANDLE, &sh->state);
 		}
-		release_stripe(sh);
+		list_add(&sh->lru, &stripes);
 	}
 	spin_lock_irq(&conf->device_lock);
-	conf->expand_progress = (sector_nr + i) * new_data_disks;
+	if (mddev->delta_disks < 0)
+		conf->reshape_progress -= reshape_sectors * new_data_disks;
+	else
+		conf->reshape_progress += reshape_sectors * new_data_disks;
 	spin_unlock_irq(&conf->device_lock);
 	/* Ok, those stripe are ready. We can start scheduling
 	 * reads on the source stripes.
@@ -3618,46 +3911,50 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	 * block on the destination stripes.
 	 */
 	first_sector =
-		raid5_compute_sector(sector_nr*(new_data_disks),
-				     raid_disks, data_disks,
-				     &dd_idx, &pd_idx, conf);
+		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
+				     1, &dd_idx, NULL);
 	last_sector =
-		raid5_compute_sector((sector_nr+conf->chunk_size/512)
-				     *(new_data_disks) -1,
-				     raid_disks, data_disks,
-				     &dd_idx, &pd_idx, conf);
-	if (last_sector >= (mddev->size<<1))
-		last_sector = (mddev->size<<1)-1;
+		raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+					    *(new_data_disks) - 1),
+				     1, &dd_idx, NULL);
+	if (last_sector >= mddev->dev_sectors)
+		last_sector = mddev->dev_sectors - 1;
 	while (first_sector <= last_sector) {
-		pd_idx = stripe_to_pdidx(first_sector, conf,
-					 conf->previous_raid_disks);
-		sh = get_active_stripe(conf, first_sector,
-				       conf->previous_raid_disks, pd_idx, 0);
+		sh = get_active_stripe(conf, first_sector, 1, 0);
 		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		release_stripe(sh);
 		first_sector += STRIPE_SECTORS;
 	}
+	/* Now that the sources are clearly marked, we can release
+	 * the destination stripes
+	 */
+	while (!list_empty(&stripes)) {
+		sh = list_entry(stripes.next, struct stripe_head, lru);
+		list_del_init(&sh->lru);
+		release_stripe(sh);
+	}
 	/* If this takes us to the resync_max point where we have to pause,
 	 * then we need to write out the superblock.
 	 */
-	sector_nr += conf->chunk_size>>9;
+	sector_nr += reshape_sectors;
 	if (sector_nr >= mddev->resync_max) {
 		/* Cannot proceed until we've updated the superblock... */
 		wait_event(conf->wait_for_overlap,
 			   atomic_read(&conf->reshape_stripes) == 0);
-		mddev->reshape_position = conf->expand_progress;
+		mddev->reshape_position = conf->reshape_progress;
+		conf->reshape_checkpoint = jiffies;
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
 			   || kthread_should_stop());
 		spin_lock_irq(&conf->device_lock);
-		conf->expand_lo = mddev->reshape_position;
+		conf->reshape_safe = mddev->reshape_position;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_for_overlap);
 	}
-	return conf->chunk_size>>9;
+	return reshape_sectors;
 }
 
 /* FIXME go_faster isn't used */
@@ -3665,9 +3962,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 {
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	struct stripe_head *sh;
-	int pd_idx;
-	int raid_disks = conf->raid_disks;
-	sector_t max_sector = mddev->size << 1;
+	sector_t max_sector = mddev->dev_sectors;
 	int sync_blocks;
 	int still_degraded = 0;
 	int i;
@@ -3675,6 +3970,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 	if (sector_nr >= max_sector) {
 		/* just being told to finish up .. nothing much to do */
 		unplug_slaves(mddev);
+
 		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
 			end_reshape(conf);
 			return 0;
@@ -3705,7 +4001,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 	 */
 	if (mddev->degraded >= conf->max_degraded &&
 	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-		sector_t rv = (mddev->size << 1) - sector_nr;
+		sector_t rv = mddev->dev_sectors - sector_nr;
 		*skipped = 1;
 		return rv;
 	}
@@ -3721,10 +4017,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
-	pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
-	sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
+	sh = get_active_stripe(conf, sector_nr, 0, 1);
 	if (sh == NULL) {
-		sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
+		sh = get_active_stripe(conf, sector_nr, 0, 0);
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access
 		 */
@@ -3766,19 +4061,15 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
 	 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
 	 */
 	struct stripe_head *sh;
-	int dd_idx, pd_idx;
+	int dd_idx;
 	sector_t sector, logical_sector, last_sector;
 	int scnt = 0;
 	int remaining;
 	int handled = 0;
 
 	logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-	sector = raid5_compute_sector(	logical_sector,
-					conf->raid_disks,
-					conf->raid_disks - conf->max_degraded,
-					&dd_idx,
-					&pd_idx,
-					conf);
+	sector = raid5_compute_sector(conf, logical_sector,
+				      0, &dd_idx, NULL);
 	last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
 
 	for (; logical_sector < last_sector;
@@ -3790,7 +4081,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
 			/* already done this stripe */
 			continue;
 
-		sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1);
+		sh = get_active_stripe(conf, sector, 0, 1);
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
@@ -3992,89 +4283,69 @@ static struct attribute_group raid5_attrs_group = {
 	.attrs = raid5_attrs,
 };
 
-static int run(mddev_t *mddev)
+static sector_t
+raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+
+	if (!sectors)
+		sectors = mddev->dev_sectors;
+	if (!raid_disks) {
+		/* size is defined by the smallest of previous and new size */
+		if (conf->raid_disks < conf->previous_raid_disks)
+			raid_disks = conf->raid_disks;
+		else
+			raid_disks = conf->previous_raid_disks;
+	}
+
+	sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+	sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+	return sectors * (raid_disks - conf->max_degraded);
+}
+
+static raid5_conf_t *setup_conf(mddev_t *mddev)
 {
 	raid5_conf_t *conf;
 	int raid_disk, memory;
 	mdk_rdev_t *rdev;
 	struct disk_info *disk;
-	int working_disks = 0;
 
-	if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
+	if (mddev->new_level != 5
+	    && mddev->new_level != 4
+	    && mddev->new_level != 6) {
 		printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
-		       mdname(mddev), mddev->level);
-		return -EIO;
+		       mdname(mddev), mddev->new_level);
+		return ERR_PTR(-EIO);
 	}
-
-	if (mddev->chunk_size < PAGE_SIZE) {
-		printk(KERN_ERR "md/raid5: chunk_size must be at least "
-		       "PAGE_SIZE but %d < %ld\n",
-		       mddev->chunk_size, PAGE_SIZE);
-		return -EINVAL;
+	if ((mddev->new_level == 5
+	     && !algorithm_valid_raid5(mddev->new_layout)) ||
+	    (mddev->new_level == 6
+	     && !algorithm_valid_raid6(mddev->new_layout))) {
+		printk(KERN_ERR "raid5: %s: layout %d not supported\n",
+		       mdname(mddev), mddev->new_layout);
+		return ERR_PTR(-EIO);
 	}
-
-	if (mddev->reshape_position != MaxSector) {
-		/* Check that we can continue the reshape.
-		 * Currently only disks can change, it must
-		 * increase, and we must be past the point where
-		 * a stripe over-writes itself
-		 */
-		sector_t here_new, here_old;
-		int old_disks;
-		int max_degraded = (mddev->level == 5 ? 1 : 2);
-
-		if (mddev->new_level != mddev->level ||
-		    mddev->new_layout != mddev->layout ||
-		    mddev->new_chunk != mddev->chunk_size) {
-			printk(KERN_ERR "raid5: %s: unsupported reshape "
-			       "required - aborting.\n",
-			       mdname(mddev));
-			return -EINVAL;
-		}
-		if (mddev->delta_disks <= 0) {
-			printk(KERN_ERR "raid5: %s: unsupported reshape "
-			       "(reduce disks) required - aborting.\n",
-			       mdname(mddev));
-			return -EINVAL;
-		}
-		old_disks = mddev->raid_disks - mddev->delta_disks;
-		/* reshape_position must be on a new-stripe boundary, and one
-		 * further up in new geometry must map after here in old
-		 * geometry.
-		 */
-		here_new = mddev->reshape_position;
-		if (sector_div(here_new, (mddev->chunk_size>>9)*
-			       (mddev->raid_disks - max_degraded))) {
-			printk(KERN_ERR "raid5: reshape_position not "
-			       "on a stripe boundary\n");
-			return -EINVAL;
-		}
-		/* here_new is the stripe we will write to */
-		here_old = mddev->reshape_position;
-		sector_div(here_old, (mddev->chunk_size>>9)*
-			   (old_disks-max_degraded));
-		/* here_old is the first stripe that we might need to read
-		 * from */
-		if (here_new >= here_old) {
-			/* Reading from the same stripe as writing to - bad */
-			printk(KERN_ERR "raid5: reshape_position too early for "
-			       "auto-recovery - aborting.\n");
-			return -EINVAL;
-		}
-		printk(KERN_INFO "raid5: reshape will continue\n");
-		/* OK, we should be able to continue; */
+	if (mddev->new_level == 6 && mddev->raid_disks < 4) {
+		printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
+		       mdname(mddev), mddev->raid_disks);
+		return ERR_PTR(-EINVAL);
 	}
 
+	if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+		printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
+			mddev->new_chunk, mdname(mddev));
+		return ERR_PTR(-EINVAL);
+	}
 
-	mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL);
-	if ((conf = mddev->private) == NULL)
+	conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
+	if (conf == NULL)
 		goto abort;
-	if (mddev->reshape_position == MaxSector) {
-		conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
-	} else {
-		conf->raid_disks = mddev->raid_disks;
+
+	conf->raid_disks = mddev->raid_disks;
+	if (mddev->reshape_position == MaxSector)
+		conf->previous_raid_disks = mddev->raid_disks;
+	else
 		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
-	}
 
 	conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
 			      GFP_KERNEL);
@@ -4086,13 +4357,12 @@ static int run(mddev_t *mddev)
 	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
 
-	if (mddev->level == 6) {
+	if (mddev->new_level == 6) {
 		conf->spare_page = alloc_page(GFP_KERNEL);
 		if (!conf->spare_page)
 			goto abort;
 	}
 	spin_lock_init(&conf->device_lock);
-	mddev->queue->queue_lock = &conf->device_lock;
 	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
@@ -4121,47 +4391,134 @@ static int run(mddev_t *mddev)
 			printk(KERN_INFO "raid5: device %s operational as raid"
 				" disk %d\n", bdevname(rdev->bdev,b),
 				raid_disk);
-			working_disks++;
 		} else
 			/* Cannot rely on bitmap to complete recovery */
 			conf->fullsync = 1;
 	}
 
-	/*
-	 * 0 for a fully functional array, 1 or 2 for a degraded array.
-	 */
-	mddev->degraded = conf->raid_disks - working_disks;
-	conf->mddev = mddev;
-	conf->chunk_size = mddev->chunk_size;
-	conf->level = mddev->level;
+	conf->chunk_size = mddev->new_chunk;
+	conf->level = mddev->new_level;
 	if (conf->level == 6)
 		conf->max_degraded = 2;
 	else
 		conf->max_degraded = 1;
-	conf->algorithm = mddev->layout;
+	conf->algorithm = mddev->new_layout;
 	conf->max_nr_stripes = NR_STRIPES;
-	conf->expand_progress = mddev->reshape_position;
-
-	/* device size must be a multiple of chunk size */
-	mddev->size &= ~(mddev->chunk_size/1024 -1);
-	mddev->resync_max_sectors = mddev->size << 1;
+	conf->reshape_progress = mddev->reshape_position;
+	if (conf->reshape_progress != MaxSector) {
+		conf->prev_chunk = mddev->chunk_size;
+		conf->prev_algo = mddev->layout;
+	}
 
-	if (conf->level == 6 && conf->raid_disks < 4) {
-		printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
-		       mdname(mddev), conf->raid_disks);
+	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+		 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
+	if (grow_stripes(conf, conf->max_nr_stripes)) {
+		printk(KERN_ERR
+			"raid5: couldn't allocate %dkB for buffers\n", memory);
 		goto abort;
-	}
-	if (!conf->chunk_size || conf->chunk_size % 4) {
-		printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
-			conf->chunk_size, mdname(mddev));
+	} else
+		printk(KERN_INFO "raid5: allocated %dkB for %s\n",
+			memory, mdname(mddev));
+
+	conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+	if (!conf->thread) {
+		printk(KERN_ERR
+		       "raid5: couldn't allocate thread for %s\n",
+		       mdname(mddev));
 		goto abort;
 	}
-	if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
-		printk(KERN_ERR 
-			"raid5: unsupported parity algorithm %d for %s\n",
-			conf->algorithm, mdname(mddev));
-		goto abort;
+
+	return conf;
+
+ abort:
+	if (conf) {
+		shrink_stripes(conf);
+		safe_put_page(conf->spare_page);
+		kfree(conf->disks);
+		kfree(conf->stripe_hashtbl);
+		kfree(conf);
+		return ERR_PTR(-EIO);
+	} else
+		return ERR_PTR(-ENOMEM);
+}
+
+static int run(mddev_t *mddev)
+{
+	raid5_conf_t *conf;
+	int working_disks = 0;
+	mdk_rdev_t *rdev;
+
+	if (mddev->reshape_position != MaxSector) {
+		/* Check that we can continue the reshape.
+		 * Currently only disks can change, it must
+		 * increase, and we must be past the point where
+		 * a stripe over-writes itself
+		 */
+		sector_t here_new, here_old;
+		int old_disks;
+		int max_degraded = (mddev->level == 6 ? 2 : 1);
+
+		if (mddev->new_level != mddev->level) {
+			printk(KERN_ERR "raid5: %s: unsupported reshape "
+			       "required - aborting.\n",
+			       mdname(mddev));
+			return -EINVAL;
+		}
+		old_disks = mddev->raid_disks - mddev->delta_disks;
+		/* reshape_position must be on a new-stripe boundary, and one
+		 * further up in new geometry must map after here in old
+		 * geometry.
+		 */
+		here_new = mddev->reshape_position;
+		if (sector_div(here_new, (mddev->new_chunk>>9)*
+			       (mddev->raid_disks - max_degraded))) {
+			printk(KERN_ERR "raid5: reshape_position not "
+			       "on a stripe boundary\n");
+			return -EINVAL;
+		}
+		/* here_new is the stripe we will write to */
+		here_old = mddev->reshape_position;
+		sector_div(here_old, (mddev->chunk_size>>9)*
+			   (old_disks-max_degraded));
+		/* here_old is the first stripe that we might need to read
+		 * from */
+		if (here_new >= here_old) {
+			/* Reading from the same stripe as writing to - bad */
+			printk(KERN_ERR "raid5: reshape_position too early for "
+			       "auto-recovery - aborting.\n");
+			return -EINVAL;
+		}
+		printk(KERN_INFO "raid5: reshape will continue\n");
+		/* OK, we should be able to continue; */
+	} else {
+		BUG_ON(mddev->level != mddev->new_level);
+		BUG_ON(mddev->layout != mddev->new_layout);
+		BUG_ON(mddev->chunk_size != mddev->new_chunk);
+		BUG_ON(mddev->delta_disks != 0);
 	}
+
+	if (mddev->private == NULL)
+		conf = setup_conf(mddev);
+	else
+		conf = mddev->private;
+
+	if (IS_ERR(conf))
+		return PTR_ERR(conf);
+
+	mddev->thread = conf->thread;
+	conf->thread = NULL;
+	mddev->private = conf;
+
+	/*
+	 * 0 for a fully functional array, 1 or 2 for a degraded array.
+	 */
+	list_for_each_entry(rdev, &mddev->disks, same_set)
+		if (rdev->raid_disk >= 0 &&
+		    test_bit(In_sync, &rdev->flags))
+			working_disks++;
+
+	mddev->degraded = conf->raid_disks - working_disks;
+
 	if (mddev->degraded > conf->max_degraded) {
 		printk(KERN_ERR "raid5: not enough operational devices for %s"
 			" (%d/%d failed)\n",
@@ -4169,6 +4526,10 @@ static int run(mddev_t *mddev)
 		goto abort;
 	}
 
+	/* device size must be a multiple of chunk size */
+	mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+	mddev->resync_max_sectors = mddev->dev_sectors;
+
 	if (mddev->degraded > 0 &&
 	    mddev->recovery_cp != MaxSector) {
 		if (mddev->ok_start_degraded)
@@ -4184,43 +4545,22 @@ static int run(mddev_t *mddev)
 		}
 	}
 
-	{
-		mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
-		if (!mddev->thread) {
-			printk(KERN_ERR 
-				"raid5: couldn't allocate thread for %s\n",
-				mdname(mddev));
-			goto abort;
-		}
-	}
-	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
-		 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-	if (grow_stripes(conf, conf->max_nr_stripes)) {
-		printk(KERN_ERR 
-			"raid5: couldn't allocate %dkB for buffers\n", memory);
-		shrink_stripes(conf);
-		md_unregister_thread(mddev->thread);
-		goto abort;
-	} else
-		printk(KERN_INFO "raid5: allocated %dkB for %s\n",
-			memory, mdname(mddev));
-
 	if (mddev->degraded == 0)
 		printk("raid5: raid level %d set %s active with %d out of %d"
-			" devices, algorithm %d\n", conf->level, mdname(mddev), 
-			mddev->raid_disks-mddev->degraded, mddev->raid_disks,
-			conf->algorithm);
+		       " devices, algorithm %d\n", conf->level, mdname(mddev),
+		       mddev->raid_disks-mddev->degraded, mddev->raid_disks,
+		       mddev->new_layout);
 	else
 		printk(KERN_ALERT "raid5: raid level %d set %s active with %d"
 			" out of %d devices, algorithm %d\n", conf->level,
 			mdname(mddev), mddev->raid_disks - mddev->degraded,
-			mddev->raid_disks, conf->algorithm);
+			mddev->raid_disks, mddev->new_layout);
 
 	print_raid5_conf(conf);
 
-	if (conf->expand_progress != MaxSector) {
+	if (conf->reshape_progress != MaxSector) {
 		printk("...ok start reshape thread\n");
-		conf->expand_lo = conf->expand_progress;
+		conf->reshape_safe = conf->reshape_progress;
 		atomic_set(&conf->reshape_stripes, 0);
 		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -4247,18 +4587,22 @@ static int run(mddev_t *mddev)
 		       "raid5: failed to create sysfs attributes for %s\n",
 		       mdname(mddev));
 
+	mddev->queue->queue_lock = &conf->device_lock;
+
 	mddev->queue->unplug_fn = raid5_unplug_device;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 	mddev->queue->backing_dev_info.congested_fn = raid5_congested;
 
-	mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks -
-					    conf->max_degraded);
+	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
 
 	blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
 
 	return 0;
 abort:
+	md_unregister_thread(mddev->thread);
+	mddev->thread = NULL;
 	if (conf) {
+		shrink_stripes(conf);
 		print_raid5_conf(conf);
 		safe_put_page(conf->spare_page);
 		kfree(conf->disks);
@@ -4396,6 +4740,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
 	print_raid5_conf(conf);
 	rdev = p->rdev;
 	if (rdev) {
+		if (number >= conf->raid_disks &&
+		    conf->reshape_progress == MaxSector)
+			clear_bit(In_sync, &rdev->flags);
+
 		if (test_bit(In_sync, &rdev->flags) ||
 		    atomic_read(&rdev->nr_pending)) {
 			err = -EBUSY;
@@ -4405,7 +4753,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
 		 * isn't possible.
 		 */
 		if (!test_bit(Faulty, &rdev->flags) &&
-		    mddev->degraded <= conf->max_degraded) {
+		    mddev->degraded <= conf->max_degraded &&
+		    number < conf->raid_disks) {
 			err = -EBUSY;
 			goto abort;
 		}
@@ -4472,36 +4821,48 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
 	 * any io in the removed space completes, but it hardly seems
 	 * worth it.
 	 */
-	raid5_conf_t *conf = mddev_to_conf(mddev);
-
 	sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
-	mddev->array_sectors = sectors * (mddev->raid_disks
-					  - conf->max_degraded);
+	md_set_array_sectors(mddev, raid5_size(mddev, sectors,
+					       mddev->raid_disks));
+	if (mddev->array_sectors >
+	    raid5_size(mddev, sectors, mddev->raid_disks))
+		return -EINVAL;
 	set_capacity(mddev->gendisk, mddev->array_sectors);
 	mddev->changed = 1;
-	if (sectors/2  > mddev->size && mddev->recovery_cp == MaxSector) {
-		mddev->recovery_cp = mddev->size << 1;
+	if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
+		mddev->recovery_cp = mddev->dev_sectors;
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
-	mddev->size = sectors /2;
+	mddev->dev_sectors = sectors;
 	mddev->resync_max_sectors = sectors;
 	return 0;
 }
 
-#ifdef CONFIG_MD_RAID5_RESHAPE
 static int raid5_check_reshape(mddev_t *mddev)
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
-	int err;
 
-	if (mddev->delta_disks < 0 ||
-	    mddev->new_level != mddev->level)
-		return -EINVAL; /* Cannot shrink array or change level yet */
-	if (mddev->delta_disks == 0)
-		return 0; /* nothing to do */
+	if (mddev->delta_disks == 0 &&
+	    mddev->new_layout == mddev->layout &&
+	    mddev->new_chunk == mddev->chunk_size)
+		return -EINVAL; /* nothing to do */
 	if (mddev->bitmap)
 		/* Cannot grow a bitmap yet */
 		return -EBUSY;
+	if (mddev->degraded > conf->max_degraded)
+		return -EINVAL;
+	if (mddev->delta_disks < 0) {
+		/* We might be able to shrink, but the devices must
+		 * be made bigger first.
+		 * For raid6, 4 is the minimum size.
+		 * Otherwise 2 is the minimum
+		 */
+		int min = 2;
+		if (mddev->level == 6)
+			min = 4;
+		if (mddev->raid_disks + mddev->delta_disks < min)
+			return -EINVAL;
+	}
 
 	/* Can only proceed if there are plenty of stripe_heads.
 	 * We need a minimum of one full stripe,, and for sensible progress
@@ -4514,18 +4875,12 @@ static int raid5_check_reshape(mddev_t *mddev)
 	if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
 	    (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
 		printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
-		       (mddev->chunk_size / STRIPE_SIZE)*4);
+		       (max(mddev->chunk_size, mddev->new_chunk)
+			/ STRIPE_SIZE)*4);
 		return -ENOSPC;
 	}
 
-	err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
-	if (err)
-		return err;
-
-	if (mddev->degraded > conf->max_degraded)
-		return -EINVAL;
-	/* looks like we might be able to manage this */
-	return 0;
+	return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
 }
 
 static int raid5_start_reshape(mddev_t *mddev)
@@ -4550,12 +4905,31 @@ static int raid5_start_reshape(mddev_t *mddev)
 		 */
 		return -EINVAL;
 
+	/* Refuse to reduce size of the array.  Any reductions in
+	 * array size must be through explicit setting of array_size
+	 * attribute.
+	 */
+	if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
+	    < mddev->array_sectors) {
+		printk(KERN_ERR "md: %s: array size must be reduced "
+		       "before number of disks\n", mdname(mddev));
+		return -EINVAL;
+	}
+
 	atomic_set(&conf->reshape_stripes, 0);
 	spin_lock_irq(&conf->device_lock);
 	conf->previous_raid_disks = conf->raid_disks;
 	conf->raid_disks += mddev->delta_disks;
-	conf->expand_progress = 0;
-	conf->expand_lo = 0;
+	conf->prev_chunk = conf->chunk_size;
+	conf->chunk_size = mddev->new_chunk;
+	conf->prev_algo = conf->algorithm;
+	conf->algorithm = mddev->new_layout;
+	if (mddev->delta_disks < 0)
+		conf->reshape_progress = raid5_size(mddev, 0, 0);
+	else
+		conf->reshape_progress = 0;
+	conf->reshape_safe = conf->reshape_progress;
+	conf->generation++;
 	spin_unlock_irq(&conf->device_lock);
 
 	/* Add some new drives, as many as will fit.
@@ -4580,9 +4954,12 @@ static int raid5_start_reshape(mddev_t *mddev)
 				break;
 		}
 
-	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	if (mddev->delta_disks > 0) {
+		spin_lock_irqsave(&conf->device_lock, flags);
+		mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
+			- added_devices;
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+	}
 	mddev->raid_disks = conf->raid_disks;
 	mddev->reshape_position = 0;
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -4597,52 +4974,86 @@ static int raid5_start_reshape(mddev_t *mddev)
 		mddev->recovery = 0;
 		spin_lock_irq(&conf->device_lock);
 		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-		conf->expand_progress = MaxSector;
+		conf->reshape_progress = MaxSector;
 		spin_unlock_irq(&conf->device_lock);
 		return -EAGAIN;
 	}
+	conf->reshape_checkpoint = jiffies;
 	md_wakeup_thread(mddev->sync_thread);
 	md_new_event(mddev);
 	return 0;
 }
-#endif
 
+/* This is called from the reshape thread and should make any
+ * changes needed in 'conf'
+ */
 static void end_reshape(raid5_conf_t *conf)
 {
-	struct block_device *bdev;
 
 	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
-		conf->mddev->array_sectors = 2 * conf->mddev->size *
-			(conf->raid_disks - conf->max_degraded);
-		set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
-		conf->mddev->changed = 1;
-
-		bdev = bdget_disk(conf->mddev->gendisk, 0);
-		if (bdev) {
-			mutex_lock(&bdev->bd_inode->i_mutex);
-			i_size_write(bdev->bd_inode,
-				     (loff_t)conf->mddev->array_sectors << 9);
-			mutex_unlock(&bdev->bd_inode->i_mutex);
-			bdput(bdev);
-		}
+
 		spin_lock_irq(&conf->device_lock);
-		conf->expand_progress = MaxSector;
+		conf->previous_raid_disks = conf->raid_disks;
+		conf->reshape_progress = MaxSector;
 		spin_unlock_irq(&conf->device_lock);
-		conf->mddev->reshape_position = MaxSector;
+		wake_up(&conf->wait_for_overlap);
 
 		/* read-ahead size must cover two whole stripes, which is
 		 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
 		 */
 		{
-			int data_disks = conf->previous_raid_disks - conf->max_degraded;
-			int stripe = data_disks *
-				(conf->mddev->chunk_size / PAGE_SIZE);
+			int data_disks = conf->raid_disks - conf->max_degraded;
+			int stripe = data_disks * (conf->chunk_size
+						   / PAGE_SIZE);
 			if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
 				conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
 		}
 	}
 }
 
+/* This is called from the raid5d thread with mddev_lock held.
+ * It makes config changes to the device.
+ */
+static void raid5_finish_reshape(mddev_t *mddev)
+{
+	struct block_device *bdev;
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+
+	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+
+		if (mddev->delta_disks > 0) {
+			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+			set_capacity(mddev->gendisk, mddev->array_sectors);
+			mddev->changed = 1;
+
+			bdev = bdget_disk(mddev->gendisk, 0);
+			if (bdev) {
+				mutex_lock(&bdev->bd_inode->i_mutex);
+				i_size_write(bdev->bd_inode,
+					     (loff_t)mddev->array_sectors << 9);
+				mutex_unlock(&bdev->bd_inode->i_mutex);
+				bdput(bdev);
+			}
+		} else {
+			int d;
+			mddev->degraded = conf->raid_disks;
+			for (d = 0; d < conf->raid_disks ; d++)
+				if (conf->disks[d].rdev &&
+				    test_bit(In_sync,
+					     &conf->disks[d].rdev->flags))
+					mddev->degraded--;
+			for (d = conf->raid_disks ;
+			     d < conf->raid_disks - mddev->delta_disks;
+			     d++)
+				raid5_remove_disk(mddev, d);
+		}
+		mddev->layout = conf->algorithm;
+		mddev->chunk_size = conf->chunk_size;
+		mddev->reshape_position = MaxSector;
+		mddev->delta_disks = 0;
+	}
+}
+
 static void raid5_quiesce(mddev_t *mddev, int state)
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4672,6 +5083,212 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 	}
 }
 
+
+static void *raid5_takeover_raid1(mddev_t *mddev)
+{
+	int chunksect;
+
+	if (mddev->raid_disks != 2 ||
+	    mddev->degraded > 1)
+		return ERR_PTR(-EINVAL);
+
+	/* Should check if there are write-behind devices? */
+
+	chunksect = 64*2; /* 64K by default */
+
+	/* The array must be an exact multiple of chunksize */
+	while (chunksect && (mddev->array_sectors & (chunksect-1)))
+		chunksect >>= 1;
+
+	if ((chunksect<<9) < STRIPE_SIZE)
+		/* array size does not allow a suitable chunk size */
+		return ERR_PTR(-EINVAL);
+
+	mddev->new_level = 5;
+	mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
+	mddev->new_chunk = chunksect << 9;
+
+	return setup_conf(mddev);
+}
+
+static void *raid5_takeover_raid6(mddev_t *mddev)
+{
+	int new_layout;
+
+	switch (mddev->layout) {
+	case ALGORITHM_LEFT_ASYMMETRIC_6:
+		new_layout = ALGORITHM_LEFT_ASYMMETRIC;
+		break;
+	case ALGORITHM_RIGHT_ASYMMETRIC_6:
+		new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
+		break;
+	case ALGORITHM_LEFT_SYMMETRIC_6:
+		new_layout = ALGORITHM_LEFT_SYMMETRIC;
+		break;
+	case ALGORITHM_RIGHT_SYMMETRIC_6:
+		new_layout = ALGORITHM_RIGHT_SYMMETRIC;
+		break;
+	case ALGORITHM_PARITY_0_6:
+		new_layout = ALGORITHM_PARITY_0;
+		break;
+	case ALGORITHM_PARITY_N:
+		new_layout = ALGORITHM_PARITY_N;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	mddev->new_level = 5;
+	mddev->new_layout = new_layout;
+	mddev->delta_disks = -1;
+	mddev->raid_disks -= 1;
+	return setup_conf(mddev);
+}
+
+
+static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+{
+	/* For a 2-drive array, the layout and chunk size can be changed
+	 * immediately as not restriping is needed.
+	 * For larger arrays we record the new value - after validation
+	 * to be used by a reshape pass.
+	 */
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+
+	if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+		return -EINVAL;
+	if (new_chunk > 0) {
+		if (new_chunk & (new_chunk-1))
+			/* not a power of 2 */
+			return -EINVAL;
+		if (new_chunk < PAGE_SIZE)
+			return -EINVAL;
+		if (mddev->array_sectors & ((new_chunk>>9)-1))
+			/* not factor of array size */
+			return -EINVAL;
+	}
+
+	/* They look valid */
+
+	if (mddev->raid_disks == 2) {
+
+		if (new_layout >= 0) {
+			conf->algorithm = new_layout;
+			mddev->layout = mddev->new_layout = new_layout;
+		}
+		if (new_chunk > 0) {
+			conf->chunk_size = new_chunk;
+			mddev->chunk_size = mddev->new_chunk = new_chunk;
+		}
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+		md_wakeup_thread(mddev->thread);
+	} else {
+		if (new_layout >= 0)
+			mddev->new_layout = new_layout;
+		if (new_chunk > 0)
+			mddev->new_chunk = new_chunk;
+	}
+	return 0;
+}
+
+static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+{
+	if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+		return -EINVAL;
+	if (new_chunk > 0) {
+		if (new_chunk & (new_chunk-1))
+			/* not a power of 2 */
+			return -EINVAL;
+		if (new_chunk < PAGE_SIZE)
+			return -EINVAL;
+		if (mddev->array_sectors & ((new_chunk>>9)-1))
+			/* not factor of array size */
+			return -EINVAL;
+	}
+
+	/* They look valid */
+
+	if (new_layout >= 0)
+		mddev->new_layout = new_layout;
+	if (new_chunk > 0)
+		mddev->new_chunk = new_chunk;
+
+	return 0;
+}
+
+static void *raid5_takeover(mddev_t *mddev)
+{
+	/* raid5 can take over:
+	 *  raid0 - if all devices are the same - make it a raid4 layout
+	 *  raid1 - if there are two drives.  We need to know the chunk size
+	 *  raid4 - trivial - just use a raid4 layout.
+	 *  raid6 - Providing it is a *_6 layout
+	 *
+	 * For now, just do raid1
+	 */
+
+	if (mddev->level == 1)
+		return raid5_takeover_raid1(mddev);
+	if (mddev->level == 4) {
+		mddev->new_layout = ALGORITHM_PARITY_N;
+		mddev->new_level = 5;
+		return setup_conf(mddev);
+	}
+	if (mddev->level == 6)
+		return raid5_takeover_raid6(mddev);
+
+	return ERR_PTR(-EINVAL);
+}
+
+
+static struct mdk_personality raid5_personality;
+
+static void *raid6_takeover(mddev_t *mddev)
+{
+	/* Currently can only take over a raid5.  We map the
+	 * personality to an equivalent raid6 personality
+	 * with the Q block at the end.
+	 */
+	int new_layout;
+
+	if (mddev->pers != &raid5_personality)
+		return ERR_PTR(-EINVAL);
+	if (mddev->degraded > 1)
+		return ERR_PTR(-EINVAL);
+	if (mddev->raid_disks > 253)
+		return ERR_PTR(-EINVAL);
+	if (mddev->raid_disks < 3)
+		return ERR_PTR(-EINVAL);
+
+	switch (mddev->layout) {
+	case ALGORITHM_LEFT_ASYMMETRIC:
+		new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+		break;
+	case ALGORITHM_RIGHT_ASYMMETRIC:
+		new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+		break;
+	case ALGORITHM_LEFT_SYMMETRIC:
+		new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
+		break;
+	case ALGORITHM_RIGHT_SYMMETRIC:
+		new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+		break;
+	case ALGORITHM_PARITY_0:
+		new_layout = ALGORITHM_PARITY_0_6;
+		break;
+	case ALGORITHM_PARITY_N:
+		new_layout = ALGORITHM_PARITY_N;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	mddev->new_level = 6;
+	mddev->new_layout = new_layout;
+	mddev->delta_disks = 1;
+	mddev->raid_disks += 1;
+	return setup_conf(mddev);
+}
+
+
 static struct mdk_personality raid6_personality =
 {
 	.name		= "raid6",
@@ -4687,11 +5304,13 @@ static struct mdk_personality raid6_personality =
 	.spare_active	= raid5_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+	.size		= raid5_size,
 	.check_reshape	= raid5_check_reshape,
 	.start_reshape  = raid5_start_reshape,
-#endif
+	.finish_reshape = raid5_finish_reshape,
 	.quiesce	= raid5_quiesce,
+	.takeover	= raid6_takeover,
+	.reconfig	= raid6_reconfig,
 };
 static struct mdk_personality raid5_personality =
 {
@@ -4708,11 +5327,13 @@ static struct mdk_personality raid5_personality =
 	.spare_active	= raid5_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+	.size		= raid5_size,
 	.check_reshape	= raid5_check_reshape,
 	.start_reshape  = raid5_start_reshape,
-#endif
+	.finish_reshape = raid5_finish_reshape,
 	.quiesce	= raid5_quiesce,
+	.takeover	= raid5_takeover,
+	.reconfig	= raid5_reconfig,
 };
 
 static struct mdk_personality raid4_personality =
@@ -4730,20 +5351,15 @@ static struct mdk_personality raid4_personality =
 	.spare_active	= raid5_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+	.size		= raid5_size,
 	.check_reshape	= raid5_check_reshape,
 	.start_reshape  = raid5_start_reshape,
-#endif
+	.finish_reshape = raid5_finish_reshape,
 	.quiesce	= raid5_quiesce,
 };
 
 static int __init raid5_init(void)
 {
-	int e;
-
-	e = raid6_select_algo();
-	if ( e )
-		return e;
 	register_md_personality(&raid6_personality);
 	register_md_personality(&raid5_personality);
 	register_md_personality(&raid4_personality);
diff --git a/include/linux/raid/raid5.h b/drivers/md/raid5.h
index 3b26727..52ba999 100644
--- a/include/linux/raid/raid5.h
+++ b/drivers/md/raid5.h
@@ -1,7 +1,6 @@
 #ifndef _RAID5_H
 #define _RAID5_H
 
-#include <linux/raid/md.h>
 #include <linux/raid/xor.h>
 
 /*
@@ -197,15 +196,19 @@ enum reconstruct_states {
 
 struct stripe_head {
 	struct hlist_node	hash;
-	struct list_head	lru;			/* inactive_list or handle_list */
-	struct raid5_private_data	*raid_conf;
-	sector_t		sector;			/* sector of this row */
-	int			pd_idx;			/* parity disk index */
-	unsigned long		state;			/* state flags */
-	atomic_t		count;			/* nr of active thread/requests */
+	struct list_head	lru;	      /* inactive_list or handle_list */
+	struct raid5_private_data *raid_conf;
+	short			generation;	/* increments with every
+						 * reshape */
+	sector_t		sector;		/* sector of this row */
+	short			pd_idx;		/* parity disk index */
+	short			qd_idx;		/* 'Q' disk index for raid6 */
+	short			ddf_layout;/* use DDF ordering to calculate Q */
+	unsigned long		state;		/* state flags */
+	atomic_t		count;	      /* nr of active thread/requests */
 	spinlock_t		lock;
 	int			bm_seq;	/* sequence number for bitmap flushes */
-	int			disks;			/* disks in stripe */
+	int			disks;		/* disks in stripe */
 	enum check_states	check_state;
 	enum reconstruct_states reconstruct_state;
 	/* stripe_operations
@@ -238,7 +241,7 @@ struct stripe_head_state {
 
 /* r6_state - extra state data only relevant to r6 */
 struct r6_state {
-	int p_failed, q_failed, qd_idx, failed_num[2];
+	int p_failed, q_failed, failed_num[2];
 };
 
 /* Flags */
@@ -268,6 +271,8 @@ struct r6_state {
 #define READ_MODIFY_WRITE	2
 /* not a write method, but a compute_parity mode */
 #define	CHECK_PARITY		3
+/* Additional compute_parity mode -- updates the parity w/o LOCKING */
+#define UPDATE_PARITY		4
 
 /*
  * Stripe state
@@ -319,7 +324,7 @@ struct r6_state {
  * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
  * HANDLE gets cleared if stripe_handle leave nothing locked.
  */
- 
+
 
 struct disk_info {
 	mdk_rdev_t	*rdev;
@@ -334,12 +339,21 @@ struct raid5_private_data {
 	int			raid_disks;
 	int			max_nr_stripes;
 
-	/* used during an expand */
-	sector_t		expand_progress;	/* MaxSector when no expand happening */
-	sector_t		expand_lo; /* from here up to expand_progress it out-of-bounds
-					    * as we haven't flushed the metadata yet
-					    */
+	/* reshape_progress is the leading edge of a 'reshape'
+	 * It has value MaxSector when no reshape is happening
+	 * If delta_disks < 0, it is the last sector we started work on,
+	 * else is it the next sector to work on.
+	 */
+	sector_t		reshape_progress;
+	/* reshape_safe is the trailing edge of a reshape.  We know that
+	 * before (or after) this address, all reshape has completed.
+	 */
+	sector_t		reshape_safe;
 	int			previous_raid_disks;
+	int			prev_chunk, prev_algo;
+	short			generation; /* increments with every reshape */
+	unsigned long		reshape_checkpoint; /* Time we last updated
+						     * metadata */
 
 	struct list_head	handle_list; /* stripes needing handling */
 	struct list_head	hold_list; /* preread ready stripes */
@@ -385,6 +399,11 @@ struct raid5_private_data {
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
+
+	/* When taking over an array from a different personality, we store
+	 * the new thread here until we fully activate the array.
+	 */
+	struct mdk_thread_s	*thread;
 };
 
 typedef struct raid5_private_data raid5_conf_t;
@@ -394,9 +413,62 @@ typedef struct raid5_private_data raid5_conf_t;
 /*
  * Our supported algorithms
  */
-#define ALGORITHM_LEFT_ASYMMETRIC	0
-#define ALGORITHM_RIGHT_ASYMMETRIC	1
-#define ALGORITHM_LEFT_SYMMETRIC	2
-#define ALGORITHM_RIGHT_SYMMETRIC	3
+#define ALGORITHM_LEFT_ASYMMETRIC	0 /* Rotating Parity N with Data Restart */
+#define ALGORITHM_RIGHT_ASYMMETRIC	1 /* Rotating Parity 0 with Data Restart */
+#define ALGORITHM_LEFT_SYMMETRIC	2 /* Rotating Parity N with Data Continuation */
+#define ALGORITHM_RIGHT_SYMMETRIC	3 /* Rotating Parity 0 with Data Continuation */
+
+/* Define non-rotating (raid4) algorithms.  These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0		4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N		5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART	8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART	9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE	10 /*DDF PRL=6 RLQ=3 */
+
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6	16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6	17
+#define ALGORITHM_LEFT_SYMMETRIC_6	18
+#define ALGORITHM_RIGHT_SYMMETRIC_6	19
+#define ALGORITHM_PARITY_0_6		20
+#define ALGORITHM_PARITY_N_6		ALGORITHM_PARITY_N
+
+static inline int algorithm_valid_raid5(int layout)
+{
+	return (layout >= 0) &&
+		(layout <= 5);
+}
+static inline int algorithm_valid_raid6(int layout)
+{
+	return (layout >= 0 && layout <= 5)
+		||
+		(layout == 8 || layout == 10)
+		||
+		(layout >= 16 && layout <= 20);
+}
 
+static inline int algorithm_is_DDF(int layout)
+{
+	return layout >= 8 && layout <= 10;
+}
 #endif
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 21987e3..866215a 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -16,13 +16,20 @@
  * Algorithm list and algorithm selection for RAID-6
  */
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
+#else
+#if !RAID6_USE_EMPTY_ZERO_PAGE
+/* In .bss so it's zeroed */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+EXPORT_SYMBOL(raid6_empty_zero_page);
+#endif
 #endif
 
 struct raid6_calls raid6_call;
+EXPORT_SYMBOL_GPL(raid6_call);
 
 /* Various routine sets */
 extern const struct raid6_calls raid6_intx1;
@@ -79,6 +86,7 @@ const struct raid6_calls * const raid6_algos[] = {
 #else
 /* Need more time to be stable in userspace */
 #define RAID6_TIME_JIFFIES_LG2	9
+#define time_before(x, y) ((x) < (y))
 #endif
 
 /* Try to pick the best algorithm */
@@ -152,3 +160,12 @@ int __init raid6_select_algo(void)
 
 	return best ? 0 : -EINVAL;
 }
+
+static void raid6_exit(void)
+{
+	do { } while (0);
+}
+
+subsys_initcall(raid6_select_algo);
+module_exit(raid6_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc
index b9afd35..699dfee 100644
--- a/drivers/md/raid6altivec.uc
+++ b/drivers/md/raid6altivec.uc
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -22,7 +22,7 @@
  * bracked this with preempt_disable/enable or in a lock)
  */
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 
 #ifdef CONFIG_ALTIVEC
 
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc
index ad004ce..f9bf9cb 100644
--- a/drivers/md/raid6int.uc
+++ b/drivers/md/raid6int.uc
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
  * This file is postprocessed using unroll.pl
  */
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 
 /*
  * This is the C data type to use
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
index d4e4a1b..e7f6c13 100644
--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
 
 #if defined(__i386__) && !defined(__arch_um__)
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 #include "raid6x86.h"
 
 /* Shared with raid6sse1.c */
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c
index a8c4d94..2609f00 100644
--- a/drivers/md/raid6recov.c
+++ b/drivers/md/raid6recov.c
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
  * the syndrome.)
  */
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 
 /* Recover two failed data blocks. */
 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
@@ -63,9 +63,7 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
 		p++; q++;
 	}
 }
-
-
-
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
 
 /* Recover failure of one data block plus the P block */
 void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
@@ -97,9 +95,10 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
 		q++; dq++;
 	}
 }
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
 
-
-#ifndef __KERNEL__		/* Testing only */
+#ifndef __KERNEL__
+/* Testing only */
 
 /* Recover two failed blocks. */
 void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
index 0666237..b274dd5 100644
--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -23,7 +23,7 @@
 
 #if defined(__i386__) && !defined(__arch_um__)
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 #include "raid6x86.h"
 
 /* Defined in raid6mmx.c */
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
index b034ad8..6ed6c6c 100644
--- a/drivers/md/raid6sse2.c
+++ b/drivers/md/raid6sse2.c
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -19,7 +19,7 @@
 
 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
 
-#include "raid6.h"
+#include <linux/raid/pq.h>
 #include "raid6x86.h"
 
 static const struct raid6_sse_constants {
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile
index 78e0396..58ffdf4 100644
--- a/drivers/md/raid6test/Makefile
+++ b/drivers/md/raid6test/Makefile
@@ -5,7 +5,7 @@
 
 CC	 = gcc
 OPTFLAGS = -O2			# Adjust as desired
-CFLAGS	 = -I.. -g $(OPTFLAGS)
+CFLAGS	 = -I.. -I ../../../include -g $(OPTFLAGS)
 LD	 = ld
 PERL	 = perl
 AR	 = ar
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c
index 559cc41..7a93031 100644
--- a/drivers/md/raid6test/test.c
+++ b/drivers/md/raid6test/test.c
@@ -17,7 +17,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-#include "raid6.h"
+#include <linux/raid/pq.h>
 
 #define NDISKS		16	/* Including P and Q */
 
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
index 99fea7a..4c22c15 100644
--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index e9026cb..572d32f 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -117,7 +117,7 @@ static int __init pxa2xx_flash_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit pxa2xx_flash_remove(struct platform_device *dev)
+static int __devexit pxa2xx_flash_remove(struct platform_device *dev)
 {
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
diff --git a/drivers/parisc/asp.c b/drivers/parisc/asp.c
index 79311335..9ca2109 100644
--- a/drivers/parisc/asp.c
+++ b/drivers/parisc/asp.c
@@ -81,7 +81,7 @@ static int __init asp_init_chip(struct parisc_device *dev)
 	asp.hpa = ASP_INTERRUPT_ADDR;
 
 	printk(KERN_INFO "%s version %d at 0x%lx found.\n", 
-		asp.name, asp.version, dev->hpa.start);
+		asp.name, asp.version, (unsigned long)dev->hpa.start);
 
 	/* the IRQ ASP should use */
 	ret = -EBUSY;
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index cd4dd7e..5d610cb 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -406,8 +406,6 @@ resource_found:
 	}
 	ioc->avg_search[ioc->avg_idx++] = cr_start;
 	ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
-#endif
-#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages += pages_needed;
 #endif
 	/* 
@@ -453,10 +451,10 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
 		unsigned long mask = ~(~0UL >> pages_mapped);
 		CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8);
 #else
-		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xff, 8);
+		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffUL, 8);
 #endif
 	} else if(pages_mapped <= 16) {
-		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffff, 16);
+		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffffUL, 16);
 	} else if(pages_mapped <= 32) {
 		CCIO_FREE_MAPPINGS(ioc, res_idx, ~(unsigned int)0, 32);
 #ifdef __LP64__
@@ -1028,8 +1026,10 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 
 	while (ioc != NULL) {
 		unsigned int total_pages = ioc->res_size << 3;
+#ifdef CCIO_COLLECT_STATS
 		unsigned long avg = 0, min, max;
 		int j;
+#endif
 
 		len += seq_printf(m, "%s\n", ioc->name);
 		
@@ -1060,8 +1060,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		avg /= CCIO_SEARCH_SAMPLE;
 		len += seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
 				  min, avg, max);
-#endif
-#ifdef CCIO_COLLECT_STATS
+
 		len += seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
 				  ioc->msingle_calls, ioc->msingle_pages,
 				  (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1400,7 +1399,7 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
 	result = insert_resource(&iomem_resource, res);
 	if (result < 0) {
 		printk(KERN_ERR "%s() failed to claim CCIO bus address space (%08lx,%08lx)\n", 
-			__func__, res->start, res->end);
+			__func__, (unsigned long)res->start, (unsigned long)res->end);
 	}
 }
 
@@ -1551,7 +1550,8 @@ static int __init ccio_probe(struct parisc_device *dev)
 
 	ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
 
-	printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name, dev->hpa.start);
+	printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name,
+		(unsigned long)dev->hpa.start);
 
 	for (i = 0; i < ioc_count; i++) {
 		ioc_p = &(*ioc_p)->next;
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index bb5a1c9..52ae0b1 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -819,7 +819,9 @@ dino_bridge_init(struct dino_device *dino_dev, const char *name)
 
 		result = ccio_request_resource(dino_dev->hba.dev, &res[i]);
 		if (result < 0) {
-			printk(KERN_ERR "%s: failed to claim PCI Bus address space %d (0x%lx-0x%lx)!\n", name, i, res[i].start, res[i].end);
+			printk(KERN_ERR "%s: failed to claim PCI Bus address "
+			       "space %d (0x%lx-0x%lx)!\n", name, i,
+			       (unsigned long)res[i].start, (unsigned long)res[i].end);
 			return result;
 		}
 	}
@@ -899,7 +901,8 @@ static int __init dino_common_init(struct parisc_device *dev,
 	if (request_resource(&ioport_resource, res) < 0) {
 		printk(KERN_ERR "%s: request I/O Port region failed "
 		       "0x%lx/%lx (hpa 0x%p)\n",
-		       name, res->start, res->end, dino_dev->hba.base_addr);
+		       name, (unsigned long)res->start, (unsigned long)res->end,
+		       dino_dev->hba.base_addr);
 		return 1;
 	}
 
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 7891db5..f415fdd 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -314,7 +314,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 	char *name = is_mongoose(dev) ? "Mongoose" : "Wax";
 
 	printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n", 
-		name, dev->hpa.start);
+		name, (unsigned long)dev->hpa.start);
 
 	eisa_dev.hba.dev = dev;
 	eisa_dev.hba.iommu = ccio_get_iommu(dev);
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index 6d8aae0..c709ecc 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -98,7 +98,7 @@ static int configure_memory(const unsigned char *buf,
 			res->start = mem_parent->start + get_24(buf+len+2);
 			res->end = res->start + get_16(buf+len+5)*1024;
 			res->flags = IORESOURCE_MEM;
-			printk("memory %lx-%lx ", res->start, res->end);
+			printk("memory %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
 			result = request_resource(mem_parent, res);
 			if (result < 0) {
 				printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
@@ -188,7 +188,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent,
 			res->start = get_16(buf+len+1);
 			res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;
 			res->flags = IORESOURCE_IO;
-			printk("ioports %lx-%lx ", res->start, res->end);
+			printk("ioports %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
 			result = request_resource(io_parent, res);
 			if (result < 0) {
 				printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 501aaf1..73348c4 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -714,7 +714,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 	if (dest_cpu < 0)
 		return;
 
-	irq_desc[irq].affinity = cpumask_of_cpu(dest_cpu);
+	cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
 	vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
 
 	spin_lock_irqsave(&iosapic_lock, flags);
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 454b653..9581d36 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -3,7 +3,7 @@
  *
  *      (c) Copyright 2000 Red Hat Software
  *      (c) Copyright 2000 Helge Deller <hdeller@redhat.com>
- *      (c) Copyright 2001-2005 Helge Deller <deller@gmx.de>
+ *      (c) Copyright 2001-2009 Helge Deller <deller@gmx.de>
  *      (c) Copyright 2001 Randolph Chung <tausq@debian.org>
  *
  *      This program is free software; you can redistribute it and/or modify
@@ -243,13 +243,11 @@ static int __init led_create_procfs(void)
 
 	proc_pdc_root = proc_mkdir("pdc", 0);
 	if (!proc_pdc_root) return -1;
-	proc_pdc_root->owner = THIS_MODULE;
 	ent = create_proc_entry("led", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root);
 	if (!ent) return -1;
 	ent->data = (void *)LED_NOLCD; /* LED */
 	ent->read_proc = led_proc_read;
 	ent->write_proc = led_proc_write;
-	ent->owner = THIS_MODULE;
 
 	if (led_type == LED_HASLCD)
 	{
@@ -258,7 +256,6 @@ static int __init led_create_procfs(void)
 		ent->data = (void *)LED_HASLCD; /* LCD */
 		ent->read_proc = led_proc_read;
 		ent->write_proc = led_proc_write;
-		ent->owner = THIS_MODULE;
 	}
 
 	return 0;
@@ -463,9 +460,20 @@ static void led_work_func (struct work_struct *unused)
 	if (likely(led_lanrxtx))  currentleds |= led_get_net_activity();
 	if (likely(led_diskio))   currentleds |= led_get_diskio_activity();
 
-	/* blink all LEDs twice a second if we got an Oops (HPMC) */
-	if (unlikely(oops_in_progress)) 
-		currentleds = (count_HZ<=(HZ/2)) ? 0 : 0xff;
+	/* blink LEDs if we got an Oops (HPMC) */
+	if (unlikely(oops_in_progress)) {
+		if (boot_cpu_data.cpu_type >= pcxl2) {
+			/* newer machines don't have loadavg. LEDs, so we
+			 * let all LEDs blink twice per second instead */
+			currentleds = (count_HZ <= (HZ/2)) ? 0 : 0xff;
+		} else {
+			/* old machines: blink loadavg. LEDs twice per second */
+			if (count_HZ <= (HZ/2))
+				currentleds &= ~(LED4|LED5|LED6|LED7);
+			else
+				currentleds |= (LED4|LED5|LED6|LED7);
+		}
+	}
 
 	if (currentleds != lastleds)
 	{
@@ -511,7 +519,7 @@ static int led_halt(struct notifier_block *nb, unsigned long event, void *buf)
 	
 	/* Cancel the work item and delete the queue */
 	if (led_wq) {
-		cancel_rearming_delayed_workqueue(led_wq, &led_task);
+		cancel_delayed_work_sync(&led_task);
 		destroy_workqueue(led_wq);
 		led_wq = NULL;
 	}
@@ -630,7 +638,7 @@ int lcd_print( const char *str )
 	
 	/* temporarily disable the led work task */
 	if (led_wq)
-		cancel_rearming_delayed_workqueue(led_wq, &led_task);
+		cancel_delayed_work_sync(&led_task);
 
 	/* copy display string to buffer for procfs */
 	strlcpy(lcd_text, str, sizeof(lcd_text));
diff --git a/drivers/pcmcia/pxa2xx_cm_x255.c b/drivers/pcmcia/pxa2xx_cm_x255.c
index 4ed64d8..5143a76 100644
--- a/drivers/pcmcia/pxa2xx_cm_x255.c
+++ b/drivers/pcmcia/pxa2xx_cm_x255.c
@@ -63,7 +63,7 @@ static void cmx255_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
 				       struct pcmcia_state *state)
 {
 	int cd = skt->nr ? GPIO_PCMCIA_S1_CD_VALID : GPIO_PCMCIA_S0_CD_VALID;
-	int rdy = skt->nr ? GPIO_PCMCIA_S0_RDYINT : GPIO_PCMCIA_S1_RDYINT;
+	int rdy = skt->nr ? GPIO_PCMCIA_S1_RDYINT : GPIO_PCMCIA_S0_RDYINT;
 
 	state->detect = !gpio_get_value(cd);
 	state->ready  = !!gpio_get_value(rdy);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 56002f7..ffe34a1 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -688,22 +688,16 @@ config RTC_DRV_RS5C313
 	help
 	  If you say yes here you get support for the Ricoh RS5C313 RTC chips.
 
-config RTC_DRV_PARISC
-	tristate "PA-RISC firmware RTC support"
-	depends on PARISC
-	help
-	  Say Y or M here to enable RTC support on PA-RISC systems using
-	  firmware calls. If you do not know what you are doing, you should
+config RTC_DRV_GENERIC
+	tristate "Generic RTC support"
+	# Please consider writing a new RTC driver instead of using the generic
+	# RTC abstraction
+	depends on PARISC || M68K || PPC
+	help
+	  Say Y or M here to enable RTC support on systems using the generic
+	  RTC abstraction. If you do not know what you are doing, you should
 	  just say Y.
 
-config RTC_DRV_PPC
-       tristate "PowerPC machine dependent RTC support"
-       depends on PPC
-       help
-	 The PowerPC kernel has machine-specific functions for accessing
-	 the RTC. This exposes that functionality through the generic RTC
-	 class.
-
 config RTC_DRV_PXA
        tristate "PXA27x/PXA3xx"
        depends on ARCH_PXA
@@ -747,4 +741,13 @@ config RTC_DRV_MV
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-mv.
 
+config RTC_DRV_PS3
+	tristate "PS3 RTC"
+	depends on PPC_PS3
+	help
+	  If you say yes here you will get support for the RTC on PS3.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-ps3.
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index e7b0998..6c0639a 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -56,8 +56,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
-obj-$(CONFIG_RTC_DRV_PARISC)	+= rtc-parisc.o
-obj-$(CONFIG_RTC_DRV_PPC)	+= rtc-ppc.o
+obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
 obj-$(CONFIG_RTC_DRV_PXA)	+= rtc-pxa.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
@@ -77,3 +76,4 @@ obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_PCF50633)	+= rtc-pcf50633.o
+obj-$(CONFIG_RTC_DRV_PS3)	+= rtc-ps3.o
diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c
new file mode 100644
index 0000000..98322004
--- /dev/null
+++ b/drivers/rtc/rtc-generic.c
@@ -0,0 +1,84 @@
+/* rtc-generic: RTC driver using the generic RTC abstraction
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/rtc.h>
+
+static int generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned int ret = get_rtc_time(tm);
+
+	if (ret & RTC_BATT_BAD)
+		return -EOPNOTSUPP;
+
+	return rtc_valid_tm(tm);
+}
+
+static int generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+	if (set_rtc_time(tm) < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops generic_rtc_ops = {
+	.read_time = generic_get_time,
+	.set_time = generic_set_time,
+};
+
+static int __init generic_rtc_probe(struct platform_device *dev)
+{
+	struct rtc_device *rtc;
+
+	rtc = rtc_device_register("rtc-generic", &dev->dev, &generic_rtc_ops,
+				  THIS_MODULE);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	platform_set_drvdata(dev, rtc);
+
+	return 0;
+}
+
+static int __exit generic_rtc_remove(struct platform_device *dev)
+{
+	struct rtc_device *rtc = platform_get_drvdata(dev);
+
+	rtc_device_unregister(rtc);
+
+	return 0;
+}
+
+static struct platform_driver generic_rtc_driver = {
+	.driver = {
+		.name = "rtc-generic",
+		.owner = THIS_MODULE,
+	},
+	.remove = __exit_p(generic_rtc_remove),
+};
+
+static int __init generic_rtc_init(void)
+{
+	return platform_driver_probe(&generic_rtc_driver, generic_rtc_probe);
+}
+
+static void __exit generic_rtc_fini(void)
+{
+	platform_driver_unregister(&generic_rtc_driver);
+}
+
+module_init(generic_rtc_init);
+module_exit(generic_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic RTC driver");
+MODULE_ALIAS("platform:rtc-generic");
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
deleted file mode 100644
index b966f56..0000000
--- a/drivers/rtc/rtc-parisc.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* rtc-parisc: RTC for HP PA-RISC firmware
- *
- * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/platform_device.h>
-#include <linux/rtc.h>
-
-#include <asm/rtc.h>
-
-static int parisc_get_time(struct device *dev, struct rtc_time *tm)
-{
-	unsigned long ret;
-
-	ret = get_rtc_time(tm);
-
-	if (ret & RTC_BATT_BAD)
-		return -EOPNOTSUPP;
-
-	return rtc_valid_tm(tm);
-}
-
-static int parisc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	if (set_rtc_time(tm) < 0)
-		return -EOPNOTSUPP;
-
-	return 0;
-}
-
-static const struct rtc_class_ops parisc_rtc_ops = {
-	.read_time = parisc_get_time,
-	.set_time = parisc_set_time,
-};
-
-static int __init parisc_rtc_probe(struct platform_device *dev)
-{
-	struct rtc_device *rtc;
-
-	rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
-				  THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
-
-	platform_set_drvdata(dev, rtc);
-
-	return 0;
-}
-
-static int __exit parisc_rtc_remove(struct platform_device *dev)
-{
-	struct rtc_device *rtc = platform_get_drvdata(dev);
-
-	rtc_device_unregister(rtc);
-
-	return 0;
-}
-
-static struct platform_driver parisc_rtc_driver = {
-	.driver = {
-		.name = "rtc-parisc",
-		.owner = THIS_MODULE,
-	},
-	.probe = parisc_rtc_probe,
-	.remove = __devexit_p(parisc_rtc_remove),
-};
-
-static int __init parisc_rtc_init(void)
-{
-	return platform_driver_probe(&parisc_rtc_driver, parisc_rtc_probe);
-}
-
-static void __exit parisc_rtc_fini(void)
-{
-	platform_driver_unregister(&parisc_rtc_driver);
-}
-
-module_init(parisc_rtc_init);
-module_exit(parisc_rtc_fini);
-
-MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("HP PA-RISC RTC driver");
diff --git a/drivers/rtc/rtc-ppc.c b/drivers/rtc/rtc-ppc.c
deleted file mode 100644
index c8e97e2..0000000
--- a/drivers/rtc/rtc-ppc.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * RTC driver for ppc_md RTC functions
- *
- * © 2007 Red Hat, Inc.
- *
- * Author: David Woodhouse <dwmw2@infradead.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/rtc.h>
-#include <linux/platform_device.h>
-#include <asm/machdep.h>
-
-static int ppc_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	ppc_md.get_rtc_time(tm);
-	return 0;
-}
-
-static int ppc_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return ppc_md.set_rtc_time(tm);
-}
-
-static const struct rtc_class_ops ppc_rtc_ops = {
-	.set_time = ppc_rtc_set_time,
-	.read_time = ppc_rtc_read_time,
-};
-
-static struct rtc_device *rtc;
-static struct platform_device *ppc_rtc_pdev;
-
-static int __init ppc_rtc_init(void)
-{
-	if (!ppc_md.get_rtc_time || !ppc_md.set_rtc_time)
-		return -ENODEV;
-
-	ppc_rtc_pdev = platform_device_register_simple("ppc-rtc", 0, NULL, 0);
-	if (IS_ERR(ppc_rtc_pdev))
-		return PTR_ERR(ppc_rtc_pdev);
-
-	rtc = rtc_device_register("ppc_md", &ppc_rtc_pdev->dev,
-				  &ppc_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		platform_device_unregister(ppc_rtc_pdev);
-		return PTR_ERR(rtc);
-	}
-
-	return 0;
-}
-
-static void __exit ppc_rtc_exit(void)
-{
-	rtc_device_unregister(rtc);
-	platform_device_unregister(ppc_rtc_pdev);
-}
-
-module_init(ppc_rtc_init);
-module_exit(ppc_rtc_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
-MODULE_DESCRIPTION("Generic RTC class driver for PowerPC");
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
new file mode 100644
index 0000000..968133c
--- /dev/null
+++ b/drivers/rtc/rtc-ps3.c
@@ -0,0 +1,104 @@
+/*
+ * PS3 RTC Driver
+ *
+ * Copyright 2009 Sony Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+
+static u64 read_rtc(void)
+{
+	int result;
+	u64 rtc_val;
+	u64 tb_val;
+
+	result = lv1_get_rtc(&rtc_val, &tb_val);
+	BUG_ON(result);
+
+	return rtc_val;
+}
+
+static int ps3_get_time(struct device *dev, struct rtc_time *tm)
+{
+	rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
+	return rtc_valid_tm(tm);
+}
+
+static int ps3_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned long now;
+
+	rtc_tm_to_time(tm, &now);
+	ps3_os_area_set_rtc_diff(now - read_rtc());
+	return 0;
+}
+
+static const struct rtc_class_ops ps3_rtc_ops = {
+	.read_time = ps3_get_time,
+	.set_time = ps3_set_time,
+};
+
+static int __init ps3_rtc_probe(struct platform_device *dev)
+{
+	struct rtc_device *rtc;
+
+	rtc = rtc_device_register("rtc-ps3", &dev->dev, &ps3_rtc_ops,
+				  THIS_MODULE);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	platform_set_drvdata(dev, rtc);
+	return 0;
+}
+
+static int __exit ps3_rtc_remove(struct platform_device *dev)
+{
+	rtc_device_unregister(platform_get_drvdata(dev));
+	return 0;
+}
+
+static struct platform_driver ps3_rtc_driver = {
+	.driver = {
+		.name = "rtc-ps3",
+		.owner = THIS_MODULE,
+	},
+	.remove = __exit_p(ps3_rtc_remove),
+};
+
+static int __init ps3_rtc_init(void)
+{
+	return platform_driver_probe(&ps3_rtc_driver, ps3_rtc_probe);
+}
+
+static void __exit ps3_rtc_fini(void)
+{
+	platform_driver_unregister(&ps3_rtc_driver);
+}
+
+module_init(ps3_rtc_init);
+module_exit(ps3_rtc_fini);
+
+MODULE_AUTHOR("Sony Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ps3 RTC driver");
+MODULE_ALIAS("platform:rtc-ps3");
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 56841fe..0eefb07 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -513,7 +513,7 @@ static int __init mcf_console_setup(struct console *co, char *options)
 	int parity = 'n';
 	int flow = 'n';
 
-	if ((co->index >= 0) && (co->index <= MCF_MAXPORTS))
+	if ((co->index < 0) || (co->index >= MCF_MAXPORTS))
 		co->index = 0;
 	port = &mcf_ports[co->index].port;
 	if (port->membase == 0)
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 882c57b..fdba2f6 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -46,6 +46,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/scatterlist.h>
 
@@ -328,7 +329,7 @@ struct isd200_config {
 
 struct isd200_info {
 	struct inquiry_data InquiryData;
-	struct hd_driveid *id;
+	u16 *id;
 	struct isd200_config ConfigData;
 	unsigned char *RegsBuf;
 	unsigned char ATARegs[8];
@@ -419,19 +420,19 @@ static void isd200_build_sense(struct us_data *us, struct scsi_cmnd *srb)
 		buf->Flags = UNIT_ATTENTION;
 		buf->AdditionalSenseCode = 0;
 		buf->AdditionalSenseCodeQualifier = 0;
-	} else if(error & MCR_ERR) {
+	} else if (error & ATA_MCR) {
 		buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
 		buf->AdditionalSenseLength = 0xb;
 		buf->Flags =  UNIT_ATTENTION;
 		buf->AdditionalSenseCode = 0;
 		buf->AdditionalSenseCodeQualifier = 0;
-	} else if(error & TRK0_ERR) {
+	} else if (error & ATA_TRK0NF) {
 		buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
 		buf->AdditionalSenseLength = 0xb;
 		buf->Flags =  NOT_READY;
 		buf->AdditionalSenseCode = 0;
 		buf->AdditionalSenseCodeQualifier = 0;
-	} else if(error & ECC_ERR) {
+	} else if (error & ATA_UNC) {
 		buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
 		buf->AdditionalSenseLength = 0xb;
 		buf->Flags =  DATA_PROTECT;
@@ -547,16 +548,16 @@ static int isd200_action( struct us_data *us, int action,
 		ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_5;
 		ata.generic.RegisterSelect = REG_DEVICE_HEAD | REG_COMMAND;
 		ata.write.DeviceHeadByte = info->DeviceHead;
-		ata.write.CommandByte = WIN_SRST;
+		ata.write.CommandByte = ATA_CMD_DEV_RESET;
 		isd200_set_srb(info, DMA_NONE, NULL, 0);
 		break;
 
 	case ACTION_IDENTIFY:
 		US_DEBUGP("   isd200_action(IDENTIFY)\n");
 		ata.generic.RegisterSelect = REG_COMMAND;
-		ata.write.CommandByte = WIN_IDENTIFY;
+		ata.write.CommandByte = ATA_CMD_ID_ATA;
 		isd200_set_srb(info, DMA_FROM_DEVICE, info->id,
-		                                sizeof(struct hd_driveid));
+				ATA_ID_WORDS * 2);
 		break;
 
 	default:
@@ -944,22 +945,22 @@ static int isd200_try_enum(struct us_data *us, unsigned char master_slave,
 			break;
 
 		if (!detect) {
-			if (regs[ATA_REG_STATUS_OFFSET] & BUSY_STAT) {
+			if (regs[ATA_REG_STATUS_OFFSET] & ATA_BUSY) {
 				US_DEBUGP("   %s status is still BSY, try again...\n",mstr);
 			} else {
 				US_DEBUGP("   %s status !BSY, continue with next operation\n",mstr);
 				break;
 			}
 		}
-		/* check for BUSY_STAT and */
-		/* WRERR_STAT (workaround ATA Zip drive) and */ 
-		/* ERR_STAT (workaround for Archos CD-ROM) */
+		/* check for ATA_BUSY and */
+		/* ATA_DF (workaround ATA Zip drive) and */
+		/* ATA_ERR (workaround for Archos CD-ROM) */
 		else if (regs[ATA_REG_STATUS_OFFSET] &
-			 (BUSY_STAT | WRERR_STAT | ERR_STAT )) {
+			 (ATA_BUSY | ATA_DF | ATA_ERR)) {
 			US_DEBUGP("   Status indicates it is not ready, try again...\n");
 		}
 		/* check for DRDY, ATA devices set DRDY after SRST */
-		else if (regs[ATA_REG_STATUS_OFFSET] & READY_STAT) {
+		else if (regs[ATA_REG_STATUS_OFFSET] & ATA_DRDY) {
 			US_DEBUGP("   Identified ATA device\n");
 			info->DeviceFlags |= DF_ATA_DEVICE;
 			info->DeviceHead = master_slave;
@@ -1053,103 +1054,50 @@ static int isd200_manual_enum(struct us_data *us)
 	return(retStatus);
 }
 
-static void isd200_fix_driveid (struct hd_driveid *id)
+static void isd200_fix_driveid(u16 *id)
 {
 #ifndef __LITTLE_ENDIAN
 # ifdef __BIG_ENDIAN
 	int i;
-	u16 *stringcast;
-
-	id->config         = __le16_to_cpu(id->config);
-	id->cyls           = __le16_to_cpu(id->cyls);
-	id->reserved2      = __le16_to_cpu(id->reserved2);
-	id->heads          = __le16_to_cpu(id->heads);
-	id->track_bytes    = __le16_to_cpu(id->track_bytes);
-	id->sector_bytes   = __le16_to_cpu(id->sector_bytes);
-	id->sectors        = __le16_to_cpu(id->sectors);
-	id->vendor0        = __le16_to_cpu(id->vendor0);
-	id->vendor1        = __le16_to_cpu(id->vendor1);
-	id->vendor2        = __le16_to_cpu(id->vendor2);
-	stringcast = (u16 *)&id->serial_no[0];
-	for (i = 0; i < (20/2); i++)
-		stringcast[i] = __le16_to_cpu(stringcast[i]);
-	id->buf_type       = __le16_to_cpu(id->buf_type);
-	id->buf_size       = __le16_to_cpu(id->buf_size);
-	id->ecc_bytes      = __le16_to_cpu(id->ecc_bytes);
-	stringcast = (u16 *)&id->fw_rev[0];
-	for (i = 0; i < (8/2); i++)
-		stringcast[i] = __le16_to_cpu(stringcast[i]);
-	stringcast = (u16 *)&id->model[0];
-	for (i = 0; i < (40/2); i++)
-		stringcast[i] = __le16_to_cpu(stringcast[i]);
-	id->dword_io       = __le16_to_cpu(id->dword_io);
-	id->reserved50     = __le16_to_cpu(id->reserved50);
-	id->field_valid    = __le16_to_cpu(id->field_valid);
-	id->cur_cyls       = __le16_to_cpu(id->cur_cyls);
-	id->cur_heads      = __le16_to_cpu(id->cur_heads);
-	id->cur_sectors    = __le16_to_cpu(id->cur_sectors);
-	id->cur_capacity0  = __le16_to_cpu(id->cur_capacity0);
-	id->cur_capacity1  = __le16_to_cpu(id->cur_capacity1);
-	id->lba_capacity   = __le32_to_cpu(id->lba_capacity);
-	id->dma_1word      = __le16_to_cpu(id->dma_1word);
-	id->dma_mword      = __le16_to_cpu(id->dma_mword);
-	id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes);
-	id->eide_dma_min   = __le16_to_cpu(id->eide_dma_min);
-	id->eide_dma_time  = __le16_to_cpu(id->eide_dma_time);
-	id->eide_pio       = __le16_to_cpu(id->eide_pio);
-	id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy);
-	for (i = 0; i < 2; ++i)
-		id->words69_70[i] = __le16_to_cpu(id->words69_70[i]);
-	for (i = 0; i < 4; ++i)
-		id->words71_74[i] = __le16_to_cpu(id->words71_74[i]);
-	id->queue_depth    = __le16_to_cpu(id->queue_depth);
-	for (i = 0; i < 4; ++i)
-		id->words76_79[i] = __le16_to_cpu(id->words76_79[i]);
-	id->major_rev_num  = __le16_to_cpu(id->major_rev_num);
-	id->minor_rev_num  = __le16_to_cpu(id->minor_rev_num);
-	id->command_set_1  = __le16_to_cpu(id->command_set_1);
-	id->command_set_2  = __le16_to_cpu(id->command_set_2);
-	id->cfsse          = __le16_to_cpu(id->cfsse);
-	id->cfs_enable_1   = __le16_to_cpu(id->cfs_enable_1);
-	id->cfs_enable_2   = __le16_to_cpu(id->cfs_enable_2);
-	id->csf_default    = __le16_to_cpu(id->csf_default);
-	id->dma_ultra      = __le16_to_cpu(id->dma_ultra);
-	id->trseuc         = __le16_to_cpu(id->trseuc);
-	id->trsEuc         = __le16_to_cpu(id->trsEuc);
-	id->CurAPMvalues   = __le16_to_cpu(id->CurAPMvalues);
-	id->mprc           = __le16_to_cpu(id->mprc);
-	id->hw_config      = __le16_to_cpu(id->hw_config);
-	id->acoustic       = __le16_to_cpu(id->acoustic);
-	id->msrqs          = __le16_to_cpu(id->msrqs);
-	id->sxfert         = __le16_to_cpu(id->sxfert);
-	id->sal            = __le16_to_cpu(id->sal);
-	id->spg            = __le32_to_cpu(id->spg);
-	id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2);
-	for (i = 0; i < 22; i++)
-		id->words104_125[i]   = __le16_to_cpu(id->words104_125[i]);
-	id->last_lun       = __le16_to_cpu(id->last_lun);
-	id->word127        = __le16_to_cpu(id->word127);
-	id->dlf            = __le16_to_cpu(id->dlf);
-	id->csfo           = __le16_to_cpu(id->csfo);
-	for (i = 0; i < 26; i++)
-		id->words130_155[i] = __le16_to_cpu(id->words130_155[i]);
-	id->word156        = __le16_to_cpu(id->word156);
-	for (i = 0; i < 3; i++)
-		id->words157_159[i] = __le16_to_cpu(id->words157_159[i]);
-	id->cfa_power      = __le16_to_cpu(id->cfa_power);
-	for (i = 0; i < 14; i++)
-		id->words161_175[i] = __le16_to_cpu(id->words161_175[i]);
-	for (i = 0; i < 31; i++)
-		id->words176_205[i] = __le16_to_cpu(id->words176_205[i]);
-	for (i = 0; i < 48; i++)
-		id->words206_254[i] = __le16_to_cpu(id->words206_254[i]);
-	id->integrity_word  = __le16_to_cpu(id->integrity_word);
+
+	for (i = 0; i < ATA_ID_WORDS; i++)
+		id[i] = __le16_to_cpu(id[i]);
 # else
 #  error "Please fix <asm/byteorder.h>"
 # endif
 #endif
 }
 
+static void isd200_dump_driveid(u16 *id)
+{
+	US_DEBUGP("   Identify Data Structure:\n");
+	US_DEBUGP("      config = 0x%x\n",	  id[ATA_ID_CONFIG]);
+	US_DEBUGP("      cyls = 0x%x\n",	  id[ATA_ID_CYLS]);
+	US_DEBUGP("      heads = 0x%x\n",	  id[ATA_ID_HEADS]);
+	US_DEBUGP("      track_bytes = 0x%x\n",	  id[4]);
+	US_DEBUGP("      sector_bytes = 0x%x\n",  id[5]);
+	US_DEBUGP("      sectors = 0x%x\n",	  id[ATA_ID_SECTORS]);
+	US_DEBUGP("      serial_no[0] = 0x%x\n",  *(char *)&id[ATA_ID_SERNO]);
+	US_DEBUGP("      buf_type = 0x%x\n",	  id[20]);
+	US_DEBUGP("      buf_size = 0x%x\n",	  id[ATA_ID_BUF_SIZE]);
+	US_DEBUGP("      ecc_bytes = 0x%x\n",	  id[22]);
+	US_DEBUGP("      fw_rev[0] = 0x%x\n",	  *(char *)&id[ATA_ID_FW_REV]);
+	US_DEBUGP("      model[0] = 0x%x\n",	  *(char *)&id[ATA_ID_PROD]);
+	US_DEBUGP("      max_multsect = 0x%x\n",  id[ATA_ID_MAX_MULTSECT] & 0xff);
+	US_DEBUGP("      dword_io = 0x%x\n",	  id[ATA_ID_DWORD_IO]);
+	US_DEBUGP("      capability = 0x%x\n",	  id[ATA_ID_CAPABILITY] >> 8);
+	US_DEBUGP("      tPIO = 0x%x\n",	  id[ATA_ID_OLD_PIO_MODES] >> 8);
+	US_DEBUGP("      tDMA = 0x%x\n",	  id[ATA_ID_OLD_DMA_MODES] >> 8);
+	US_DEBUGP("      field_valid = 0x%x\n",	  id[ATA_ID_FIELD_VALID]);
+	US_DEBUGP("      cur_cyls = 0x%x\n",	  id[ATA_ID_CUR_CYLS]);
+	US_DEBUGP("      cur_heads = 0x%x\n",	  id[ATA_ID_CUR_HEADS]);
+	US_DEBUGP("      cur_sectors = 0x%x\n",	  id[ATA_ID_CUR_SECTORS]);
+	US_DEBUGP("      cur_capacity = 0x%x\n",  ata_id_u32(id, 57));
+	US_DEBUGP("      multsect = 0x%x\n",	  id[ATA_ID_MULTSECT] & 0xff);
+	US_DEBUGP("      lba_capacity = 0x%x\n",  ata_id_u32(id, ATA_ID_LBA_CAPACITY));
+	US_DEBUGP("      command_set_1 = 0x%x\n", id[ATA_ID_COMMAND_SET_1]);
+	US_DEBUGP("      command_set_2 = 0x%x\n", id[ATA_ID_COMMAND_SET_2]);
+}
 
 /**************************************************************************
  * isd200_get_inquiry_data
@@ -1163,7 +1111,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
 {
 	struct isd200_info *info = (struct isd200_info *)us->extra;
 	int retStatus = ISD200_GOOD;
-	struct hd_driveid *id = info->id;
+	u16 *id = info->id;
 
 	US_DEBUGP("Entering isd200_get_inquiry_data\n");
 
@@ -1180,8 +1128,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
 			/* this must be an ATA device */
 			/* perform an ATA Command Identify */
 			transferStatus = isd200_action( us, ACTION_IDENTIFY,
-							id, 
-							sizeof(struct hd_driveid) );
+							id, ATA_ID_WORDS * 2);
 			if (transferStatus != ISD200_TRANSPORT_GOOD) {
 				/* Error issuing ATA Command Identify */
 				US_DEBUGP("   Error issuing ATA Command Identify\n");
@@ -1191,35 +1138,9 @@ static int isd200_get_inquiry_data( struct us_data *us )
 				int i;
 				__be16 *src;
 				__u16 *dest;
-				isd200_fix_driveid(id);
 
-				US_DEBUGP("   Identify Data Structure:\n");
-				US_DEBUGP("      config = 0x%x\n", id->config);
-				US_DEBUGP("      cyls = 0x%x\n", id->cyls);
-				US_DEBUGP("      heads = 0x%x\n", id->heads);
-				US_DEBUGP("      track_bytes = 0x%x\n", id->track_bytes);
-				US_DEBUGP("      sector_bytes = 0x%x\n", id->sector_bytes);
-				US_DEBUGP("      sectors = 0x%x\n", id->sectors);
-				US_DEBUGP("      serial_no[0] = 0x%x\n", id->serial_no[0]);
-				US_DEBUGP("      buf_type = 0x%x\n", id->buf_type);
-				US_DEBUGP("      buf_size = 0x%x\n", id->buf_size);
-				US_DEBUGP("      ecc_bytes = 0x%x\n", id->ecc_bytes);
-				US_DEBUGP("      fw_rev[0] = 0x%x\n", id->fw_rev[0]);
-				US_DEBUGP("      model[0] = 0x%x\n", id->model[0]);
-				US_DEBUGP("      max_multsect = 0x%x\n", id->max_multsect);
-				US_DEBUGP("      dword_io = 0x%x\n", id->dword_io);
-				US_DEBUGP("      capability = 0x%x\n", id->capability);
-				US_DEBUGP("      tPIO = 0x%x\n", id->tPIO);
-				US_DEBUGP("      tDMA = 0x%x\n", id->tDMA);
-				US_DEBUGP("      field_valid = 0x%x\n", id->field_valid);
-				US_DEBUGP("      cur_cyls = 0x%x\n", id->cur_cyls);
-				US_DEBUGP("      cur_heads = 0x%x\n", id->cur_heads);
-				US_DEBUGP("      cur_sectors = 0x%x\n", id->cur_sectors);
-				US_DEBUGP("      cur_capacity = 0x%x\n", (id->cur_capacity1 << 16) + id->cur_capacity0 );
-				US_DEBUGP("      multsect = 0x%x\n", id->multsect);
-				US_DEBUGP("      lba_capacity = 0x%x\n", id->lba_capacity);
-				US_DEBUGP("      command_set_1 = 0x%x\n", id->command_set_1);
-				US_DEBUGP("      command_set_2 = 0x%x\n", id->command_set_2);
+				isd200_fix_driveid(id);
+				isd200_dump_driveid(id);
 
 				memset(&info->InquiryData, 0, sizeof(info->InquiryData));
 
@@ -1229,30 +1150,30 @@ static int isd200_get_inquiry_data( struct us_data *us )
 				/* The length must be at least 36 (5 + 31) */
 				info->InquiryData.AdditionalLength = 0x1F;
 
-				if (id->command_set_1 & COMMANDSET_MEDIA_STATUS) {
+				if (id[ATA_ID_COMMAND_SET_1] & COMMANDSET_MEDIA_STATUS) {
 					/* set the removable bit */
 					info->InquiryData.DeviceTypeModifier = DEVICE_REMOVABLE;
 					info->DeviceFlags |= DF_REMOVABLE_MEDIA;
 				}
 
 				/* Fill in vendor identification fields */
-				src = (__be16*)id->model;
+				src = (__be16 *)&id[ATA_ID_PROD];
 				dest = (__u16*)info->InquiryData.VendorId;
 				for (i=0;i<4;i++)
 					dest[i] = be16_to_cpu(src[i]);
 
-				src = (__be16*)(id->model+8);
+				src = (__be16 *)&id[ATA_ID_PROD + 8/2];
 				dest = (__u16*)info->InquiryData.ProductId;
 				for (i=0;i<8;i++)
 					dest[i] = be16_to_cpu(src[i]);
 
-				src = (__be16*)id->fw_rev;
+				src = (__be16 *)&id[ATA_ID_FW_REV];
 				dest = (__u16*)info->InquiryData.ProductRevisionLevel;
 				for (i=0;i<2;i++)
 					dest[i] = be16_to_cpu(src[i]);
 
 				/* determine if it supports Media Status Notification */
-				if (id->command_set_2 & COMMANDSET_MEDIA_STATUS) {
+				if (id[ATA_ID_COMMAND_SET_2] & COMMANDSET_MEDIA_STATUS) {
 					US_DEBUGP("   Device supports Media Status Notification\n");
 
 					/* Indicate that it is enabled, even though it is not
@@ -1301,7 +1222,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 			      union ata_cdb * ataCdb)
 {
 	struct isd200_info *info = (struct isd200_info *)us->extra;
-	struct hd_driveid *id = info->id;
+	u16 *id = info->id;
 	int sendToTransport = 1;
 	unsigned char sectnum, head;
 	unsigned short cylinder;
@@ -1369,13 +1290,12 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 
 		US_DEBUGP("   ATA OUT - SCSIOP_READ_CAPACITY\n");
 
-		if (id->capability & CAPABILITY_LBA ) {
-			capacity = id->lba_capacity - 1;
-		} else {
-			capacity = (id->heads *
-				    id->cyls *
-				    id->sectors) - 1;
-		}
+		if (ata_id_has_lba(id))
+			capacity = ata_id_u32(id, ATA_ID_LBA_CAPACITY) - 1;
+		else
+			capacity = (id[ATA_ID_HEADS] * id[ATA_ID_CYLS] *
+				    id[ATA_ID_SECTORS]) - 1;
+
 		readCapacityData.LogicalBlockAddress = cpu_to_be32(capacity);
 		readCapacityData.BytesPerBlock = cpu_to_be32(0x200);
 
@@ -1392,16 +1312,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 		lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
 		blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
 
-		if (id->capability & CAPABILITY_LBA) {
+		if (ata_id_has_lba(id)) {
 			sectnum = (unsigned char)(lba);
 			cylinder = (unsigned short)(lba>>8);
 			head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
 		} else {
-			sectnum = (unsigned char)((lba % id->sectors) + 1);
-			cylinder = (unsigned short)(lba / (id->sectors *
-							   id->heads));
-			head = (unsigned char)((lba / id->sectors) %
-					       id->heads);
+			sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
+			cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
+					id[ATA_ID_HEADS]));
+			head = (u8)((lba / id[ATA_ID_SECTORS]) %
+					id[ATA_ID_HEADS]);
 		}
 		ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
 		ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1415,7 +1335,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 		ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
 		ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
 		ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
-		ataCdb->write.CommandByte = WIN_READ;
+		ataCdb->write.CommandByte = ATA_CMD_PIO_READ;
 		break;
 
 	case WRITE_10:
@@ -1424,14 +1344,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 		lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
 		blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
 
-		if (id->capability & CAPABILITY_LBA) {
+		if (ata_id_has_lba(id)) {
 			sectnum = (unsigned char)(lba);
 			cylinder = (unsigned short)(lba>>8);
 			head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
 		} else {
-			sectnum = (unsigned char)((lba % id->sectors) + 1);
-			cylinder = (unsigned short)(lba / (id->sectors * id->heads));
-			head = (unsigned char)((lba / id->sectors) % id->heads);
+			sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
+			cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
+					id[ATA_ID_HEADS]));
+			head = (u8)((lba / id[ATA_ID_SECTORS]) %
+					id[ATA_ID_HEADS]);
 		}
 		ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
 		ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1445,7 +1367,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 		ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
 		ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
 		ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
-		ataCdb->write.CommandByte = WIN_WRITE;
+		ataCdb->write.CommandByte = ATA_CMD_PIO_WRITE;
 		break;
 
 	case ALLOW_MEDIUM_REMOVAL:
@@ -1459,7 +1381,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
 			ataCdb->generic.TransferBlockSize = 1;
 			ataCdb->generic.RegisterSelect = REG_COMMAND;
 			ataCdb->write.CommandByte = (srb->cmnd[4] & 0x1) ?
-				WIN_DOORLOCK : WIN_DOORUNLOCK;
+				ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
 			isd200_srb_set_bufflen(srb, 0);
 		} else {
 			US_DEBUGP("   Not removeable media, just report okay\n");
@@ -1539,8 +1461,7 @@ static int isd200_init_info(struct us_data *us)
 	if (!info)
 		retStatus = ISD200_ERROR;
 	else {
-		info->id = (struct hd_driveid *)
-				kzalloc(sizeof(struct hd_driveid), GFP_KERNEL);
+		info->id = kzalloc(ATA_ID_WORDS * 2, GFP_KERNEL);
 		info->RegsBuf = (unsigned char *)
 				kmalloc(sizeof(info->ATARegs), GFP_KERNEL);
 		info->srb.sense_buffer =
diff --git a/fs/Kconfig b/fs/Kconfig
index c0022b1..86b203f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -176,6 +176,8 @@ source "fs/romfs/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 
+source "fs/exofs/Kconfig"
+
 endif # MISC_FILESYSTEMS
 
 menuconfig NETWORK_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 055d5237..70b2aed 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -122,3 +122,4 @@ obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_BTRFS_FS)		+= btrfs/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
+obj-$(CONFIG_EXOFS_FS)          += exofs/
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ff78668..3e87ce4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -23,7 +23,7 @@
 #include <linux/if.h>
 #include <linux/if_bridge.h>
 #include <linux/slab.h>
-#include <linux/raid/md.h>
+#include <linux/raid/md_u.h>
 #include <linux/kd.h>
 #include <linux/route.h>
 #include <linux/in6.h>
diff --git a/fs/exofs/BUGS b/fs/exofs/BUGS
new file mode 100644
index 0000000..1b2d4c6
--- /dev/null
+++ b/fs/exofs/BUGS
@@ -0,0 +1,3 @@
+- Out-of-space may cause a severe problem if the object (and directory entry)
+  were written, but the inode attributes failed. Then if the filesystem was
+  unmounted and mounted the kernel can get into an endless loop doing a readdir.
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
new file mode 100644
index 0000000..cc2d22d
--- /dev/null
+++ b/fs/exofs/Kbuild
@@ -0,0 +1,16 @@
+#
+# Kbuild for the EXOFS module
+#
+# Copyright (C) 2008 Panasas Inc.  All rights reserved.
+#
+# Authors:
+#   Boaz Harrosh <bharrosh@panasas.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2
+#
+# Kbuild - Gets included from the Kernels Makefile and build system
+#
+
+exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o
+obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
new file mode 100644
index 0000000..86194b2
--- /dev/null
+++ b/fs/exofs/Kconfig
@@ -0,0 +1,13 @@
+config EXOFS_FS
+	tristate "exofs: OSD based file system support"
+	depends on SCSI_OSD_ULD
+	help
+	  EXOFS is a file system that uses an OSD storage device,
+	  as its backing storage.
+
+# Debugging-related stuff
+config EXOFS_DEBUG
+	bool "Enable debugging"
+	depends on EXOFS_FS
+	help
+	  This option enables EXOFS debug prints.
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
new file mode 100644
index 0000000..b1512c4
--- /dev/null
+++ b/fs/exofs/common.h
@@ -0,0 +1,184 @@
+/*
+ * common.h - Common definitions for both Kernel and user-mode utilities
+ *
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __EXOFS_COM_H__
+#define __EXOFS_COM_H__
+
+#include <linux/types.h>
+
+#include <scsi/osd_attributes.h>
+#include <scsi/osd_initiator.h>
+#include <scsi/osd_sec.h>
+
+/****************************************************************************
+ * Object ID related defines
+ * NOTE: inode# = object ID - EXOFS_OBJ_OFF
+ ****************************************************************************/
+#define EXOFS_MIN_PID   0x10000	/* Smallest partition ID */
+#define EXOFS_OBJ_OFF	0x10000	/* offset for objects */
+#define EXOFS_SUPER_ID	0x10000	/* object ID for on-disk superblock */
+#define EXOFS_ROOT_ID	0x10002	/* object ID for root directory */
+
+/* exofs Application specific page/attribute */
+# define EXOFS_APAGE_FS_DATA	(OSD_APAGE_APP_DEFINED_FIRST + 3)
+# define EXOFS_ATTR_INODE_DATA	1
+
+/*
+ * The maximum number of files we can have is limited by the size of the
+ * inode number.  This is the largest object ID that the file system supports.
+ * Object IDs 0, 1, and 2 are always in use (see above defines).
+ */
+enum {
+	EXOFS_MAX_INO_ID = (sizeof(ino_t) * 8 == 64) ? ULLONG_MAX :
+					(1ULL << (sizeof(ino_t) * 8ULL - 1ULL)),
+	EXOFS_MAX_ID	 = (EXOFS_MAX_INO_ID - 1 - EXOFS_OBJ_OFF),
+};
+
+/****************************************************************************
+ * Misc.
+ ****************************************************************************/
+#define EXOFS_BLKSHIFT	12
+#define EXOFS_BLKSIZE	(1UL << EXOFS_BLKSHIFT)
+
+/****************************************************************************
+ * superblock-related things
+ ****************************************************************************/
+#define EXOFS_SUPER_MAGIC	0x5DF5
+
+/*
+ * The file system control block - stored in an object's data (mainly, the one
+ * with ID EXOFS_SUPER_ID).  This is where the in-memory superblock is stored
+ * on disk.  Right now it just has a magic value, which is basically a sanity
+ * check on our ability to communicate with the object store.
+ */
+struct exofs_fscb {
+	__le64  s_nextid;	/* Highest object ID used */
+	__le32  s_numfiles;	/* Number of files on fs */
+	__le16  s_magic;	/* Magic signature */
+	__le16  s_newfs;	/* Non-zero if this is a new fs */
+};
+
+/****************************************************************************
+ * inode-related things
+ ****************************************************************************/
+#define EXOFS_IDATA		5
+
+/*
+ * The file control block - stored in an object's attributes.  This is where
+ * the in-memory inode is stored on disk.
+ */
+struct exofs_fcb {
+	__le64  i_size;			/* Size of the file */
+	__le16  i_mode;         	/* File mode */
+	__le16  i_links_count;  	/* Links count */
+	__le32  i_uid;          	/* Owner Uid */
+	__le32  i_gid;          	/* Group Id */
+	__le32  i_atime;        	/* Access time */
+	__le32  i_ctime;        	/* Creation time */
+	__le32  i_mtime;        	/* Modification time */
+	__le32  i_flags;        	/* File flags (unused for now)*/
+	__le32  i_generation;   	/* File version (for NFS) */
+	__le32  i_data[EXOFS_IDATA];	/* Short symlink names and device #s */
+};
+
+#define EXOFS_INO_ATTR_SIZE	sizeof(struct exofs_fcb)
+
+/* This is the Attribute the fcb is stored in */
+static const struct __weak osd_attr g_attr_inode_data = ATTR_DEF(
+	EXOFS_APAGE_FS_DATA,
+	EXOFS_ATTR_INODE_DATA,
+	EXOFS_INO_ATTR_SIZE);
+
+/****************************************************************************
+ * dentry-related things
+ ****************************************************************************/
+#define EXOFS_NAME_LEN	255
+
+/*
+ * The on-disk directory entry
+ */
+struct exofs_dir_entry {
+	__le64		inode_no;		/* inode number           */
+	__le16		rec_len;		/* directory entry length */
+	u8		name_len;		/* name length            */
+	u8		file_type;		/* umm...file type        */
+	char		name[EXOFS_NAME_LEN];	/* file name              */
+};
+
+enum {
+	EXOFS_FT_UNKNOWN,
+	EXOFS_FT_REG_FILE,
+	EXOFS_FT_DIR,
+	EXOFS_FT_CHRDEV,
+	EXOFS_FT_BLKDEV,
+	EXOFS_FT_FIFO,
+	EXOFS_FT_SOCK,
+	EXOFS_FT_SYMLINK,
+	EXOFS_FT_MAX
+};
+
+#define EXOFS_DIR_PAD			4
+#define EXOFS_DIR_ROUND			(EXOFS_DIR_PAD - 1)
+#define EXOFS_DIR_REC_LEN(name_len) \
+	(((name_len) + offsetof(struct exofs_dir_entry, name)  + \
+	  EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
+
+/*************************
+ * function declarations *
+ *************************/
+/* osd.c                 */
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
+			   const struct osd_obj_id *obj);
+
+int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
+static inline int exofs_check_ok(struct osd_request *or)
+{
+	return exofs_check_ok_resid(or, NULL, NULL);
+}
+int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
+int exofs_async_op(struct osd_request *or,
+	osd_req_done_fn *async_done, void *caller_context, u8 *cred);
+
+int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
+
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
+
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
+
+#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
new file mode 100644
index 0000000..65b0c8c
--- /dev/null
+++ b/fs/exofs/dir.c
@@ -0,0 +1,672 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "exofs.h"
+
+static inline unsigned exofs_chunk_size(struct inode *inode)
+{
+	return inode->i_sb->s_blocksize;
+}
+
+static inline void exofs_put_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+}
+
+/* Accesses dir's inode->i_size must be called under inode lock */
+static inline unsigned long dir_pages(struct inode *inode)
+{
+	return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+}
+
+static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
+{
+	loff_t last_byte = inode->i_size;
+
+	last_byte -= page_nr << PAGE_CACHE_SHIFT;
+	if (last_byte > PAGE_CACHE_SIZE)
+		last_byte = PAGE_CACHE_SIZE;
+	return last_byte;
+}
+
+static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
+	int err = 0;
+
+	dir->i_version++;
+
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
+
+	if (pos+len > dir->i_size) {
+		i_size_write(dir, pos+len);
+		mark_inode_dirty(dir);
+	}
+	set_page_dirty(page);
+
+	if (IS_DIRSYNC(dir))
+		err = write_one_page(page, 1);
+	else
+		unlock_page(page);
+
+	return err;
+}
+
+static void exofs_check_page(struct page *page)
+{
+	struct inode *dir = page->mapping->host;
+	unsigned chunk_size = exofs_chunk_size(dir);
+	char *kaddr = page_address(page);
+	unsigned offs, rec_len;
+	unsigned limit = PAGE_CACHE_SIZE;
+	struct exofs_dir_entry *p;
+	char *error;
+
+	/* if the page is the last one in the directory */
+	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_CACHE_MASK;
+		if (limit & (chunk_size - 1))
+			goto Ebadsize;
+		if (!limit)
+			goto out;
+	}
+	for (offs = 0; offs <= limit - EXOFS_DIR_REC_LEN(1); offs += rec_len) {
+		p = (struct exofs_dir_entry *)(kaddr + offs);
+		rec_len = le16_to_cpu(p->rec_len);
+
+		if (rec_len < EXOFS_DIR_REC_LEN(1))
+			goto Eshort;
+		if (rec_len & 3)
+			goto Ealign;
+		if (rec_len < EXOFS_DIR_REC_LEN(p->name_len))
+			goto Enamelen;
+		if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+			goto Espan;
+	}
+	if (offs != limit)
+		goto Eend;
+out:
+	SetPageChecked(page);
+	return;
+
+Ebadsize:
+	EXOFS_ERR("ERROR [exofs_check_page]: "
+		"size of directory #%lu is not a multiple of chunk size",
+		dir->i_ino
+	);
+	goto fail;
+Eshort:
+	error = "rec_len is smaller than minimal";
+	goto bad_entry;
+Ealign:
+	error = "unaligned directory entry";
+	goto bad_entry;
+Enamelen:
+	error = "rec_len is too small for name_len";
+	goto bad_entry;
+Espan:
+	error = "directory entry across blocks";
+	goto bad_entry;
+bad_entry:
+	EXOFS_ERR(
+		"ERROR [exofs_check_page]: bad entry in directory #%lu: %s - "
+		"offset=%lu, inode=%llu, rec_len=%d, name_len=%d",
+		dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		_LLU(le64_to_cpu(p->inode_no)),
+		rec_len, p->name_len);
+	goto fail;
+Eend:
+	p = (struct exofs_dir_entry *)(kaddr + offs);
+	EXOFS_ERR("ERROR [exofs_check_page]: "
+		"entry in directory #%lu spans the page boundary"
+		"offset=%lu, inode=%llu",
+		dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		_LLU(le64_to_cpu(p->inode_no)));
+fail:
+	SetPageChecked(page);
+	SetPageError(page);
+}
+
+static struct page *exofs_get_page(struct inode *dir, unsigned long n)
+{
+	struct address_space *mapping = dir->i_mapping;
+	struct page *page = read_mapping_page(mapping, n, NULL);
+
+	if (!IS_ERR(page)) {
+		kmap(page);
+		if (!PageChecked(page))
+			exofs_check_page(page);
+		if (PageError(page))
+			goto fail;
+	}
+	return page;
+
+fail:
+	exofs_put_page(page);
+	return ERR_PTR(-EIO);
+}
+
+static inline int exofs_match(int len, const unsigned char *name,
+					struct exofs_dir_entry *de)
+{
+	if (len != de->name_len)
+		return 0;
+	if (!de->inode_no)
+		return 0;
+	return !memcmp(name, de->name, len);
+}
+
+static inline
+struct exofs_dir_entry *exofs_next_entry(struct exofs_dir_entry *p)
+{
+	return (struct exofs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
+}
+
+static inline unsigned
+exofs_validate_entry(char *base, unsigned offset, unsigned mask)
+{
+	struct exofs_dir_entry *de = (struct exofs_dir_entry *)(base + offset);
+	struct exofs_dir_entry *p =
+			(struct exofs_dir_entry *)(base + (offset&mask));
+	while ((char *)p < (char *)de) {
+		if (p->rec_len == 0)
+			break;
+		p = exofs_next_entry(p);
+	}
+	return (char *)p - base;
+}
+
+static unsigned char exofs_filetype_table[EXOFS_FT_MAX] = {
+	[EXOFS_FT_UNKNOWN]	= DT_UNKNOWN,
+	[EXOFS_FT_REG_FILE]	= DT_REG,
+	[EXOFS_FT_DIR]		= DT_DIR,
+	[EXOFS_FT_CHRDEV]	= DT_CHR,
+	[EXOFS_FT_BLKDEV]	= DT_BLK,
+	[EXOFS_FT_FIFO]		= DT_FIFO,
+	[EXOFS_FT_SOCK]		= DT_SOCK,
+	[EXOFS_FT_SYMLINK]	= DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXOFS_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXOFS_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXOFS_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXOFS_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXOFS_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXOFS_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXOFS_FT_SYMLINK,
+};
+
+static inline
+void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
+{
+	mode_t mode = inode->i_mode;
+	de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
+}
+
+static int
+exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	loff_t pos = filp->f_pos;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	unsigned int offset = pos & ~PAGE_CACHE_MASK;
+	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned long npages = dir_pages(inode);
+	unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
+	unsigned char *types = NULL;
+	int need_revalidate = (filp->f_version != inode->i_version);
+
+	if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
+		return 0;
+
+	types = exofs_filetype_table;
+
+	for ( ; n < npages; n++, offset = 0) {
+		char *kaddr, *limit;
+		struct exofs_dir_entry *de;
+		struct page *page = exofs_get_page(inode, n);
+
+		if (IS_ERR(page)) {
+			EXOFS_ERR("ERROR: "
+				   "bad page in #%lu",
+				   inode->i_ino);
+			filp->f_pos += PAGE_CACHE_SIZE - offset;
+			return PTR_ERR(page);
+		}
+		kaddr = page_address(page);
+		if (unlikely(need_revalidate)) {
+			if (offset) {
+				offset = exofs_validate_entry(kaddr, offset,
+								chunk_mask);
+				filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset;
+			}
+			filp->f_version = inode->i_version;
+			need_revalidate = 0;
+		}
+		de = (struct exofs_dir_entry *)(kaddr + offset);
+		limit = kaddr + exofs_last_byte(inode, n) -
+							EXOFS_DIR_REC_LEN(1);
+		for (; (char *)de <= limit; de = exofs_next_entry(de)) {
+			if (de->rec_len == 0) {
+				EXOFS_ERR("ERROR: "
+					"zero-length directory entry");
+				exofs_put_page(page);
+				return -EIO;
+			}
+			if (de->inode_no) {
+				int over;
+				unsigned char d_type = DT_UNKNOWN;
+
+				if (types && de->file_type < EXOFS_FT_MAX)
+					d_type = types[de->file_type];
+
+				offset = (char *)de - kaddr;
+				over = filldir(dirent, de->name, de->name_len,
+						(n<<PAGE_CACHE_SHIFT) | offset,
+						le64_to_cpu(de->inode_no),
+						d_type);
+				if (over) {
+					exofs_put_page(page);
+					return 0;
+				}
+			}
+			filp->f_pos += le16_to_cpu(de->rec_len);
+		}
+		exofs_put_page(page);
+	}
+
+	return 0;
+}
+
+struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
+			struct dentry *dentry, struct page **res_page)
+{
+	const unsigned char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
+	unsigned long start, n;
+	unsigned long npages = dir_pages(dir);
+	struct page *page = NULL;
+	struct exofs_i_info *oi = exofs_i(dir);
+	struct exofs_dir_entry *de;
+
+	if (npages == 0)
+		goto out;
+
+	*res_page = NULL;
+
+	start = oi->i_dir_start_lookup;
+	if (start >= npages)
+		start = 0;
+	n = start;
+	do {
+		char *kaddr;
+		page = exofs_get_page(dir, n);
+		if (!IS_ERR(page)) {
+			kaddr = page_address(page);
+			de = (struct exofs_dir_entry *) kaddr;
+			kaddr += exofs_last_byte(dir, n) - reclen;
+			while ((char *) de <= kaddr) {
+				if (de->rec_len == 0) {
+					EXOFS_ERR(
+						"ERROR: exofs_find_entry: "
+						"zero-length directory entry");
+					exofs_put_page(page);
+					goto out;
+				}
+				if (exofs_match(namelen, name, de))
+					goto found;
+				de = exofs_next_entry(de);
+			}
+			exofs_put_page(page);
+		}
+		if (++n >= npages)
+			n = 0;
+	} while (n != start);
+out:
+	return NULL;
+
+found:
+	*res_page = page;
+	oi->i_dir_start_lookup = n;
+	return de;
+}
+
+struct exofs_dir_entry *exofs_dotdot(struct inode *dir, struct page **p)
+{
+	struct page *page = exofs_get_page(dir, 0);
+	struct exofs_dir_entry *de = NULL;
+
+	if (!IS_ERR(page)) {
+		de = exofs_next_entry(
+				(struct exofs_dir_entry *)page_address(page));
+		*p = page;
+	}
+	return de;
+}
+
+ino_t exofs_parent_ino(struct dentry *child)
+{
+	struct page *page;
+	struct exofs_dir_entry *de;
+	ino_t ino;
+
+	de = exofs_dotdot(child->d_inode, &page);
+	if (!de)
+		return 0;
+
+	ino = le64_to_cpu(de->inode_no);
+	exofs_put_page(page);
+	return ino;
+}
+
+ino_t exofs_inode_by_name(struct inode *dir, struct dentry *dentry)
+{
+	ino_t res = 0;
+	struct exofs_dir_entry *de;
+	struct page *page;
+
+	de = exofs_find_entry(dir, dentry, &page);
+	if (de) {
+		res = le64_to_cpu(de->inode_no);
+		exofs_put_page(page);
+	}
+	return res;
+}
+
+int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
+			struct page *page, struct inode *inode)
+{
+	loff_t pos = page_offset(page) +
+			(char *) de - (char *) page_address(page);
+	unsigned len = le16_to_cpu(de->rec_len);
+	int err;
+
+	lock_page(page);
+	err = exofs_write_begin(NULL, page->mapping, pos, len,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	if (err)
+		EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n",
+			  err);
+
+	de->inode_no = cpu_to_le64(inode->i_ino);
+	exofs_set_de_type(de, inode);
+	if (likely(!err))
+		err = exofs_commit_chunk(page, pos, len);
+	exofs_put_page(page);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(dir);
+	return err;
+}
+
+int exofs_add_link(struct dentry *dentry, struct inode *inode)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	const unsigned char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned chunk_size = exofs_chunk_size(dir);
+	unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
+	unsigned short rec_len, name_len;
+	struct page *page = NULL;
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
+	struct exofs_dir_entry *de;
+	unsigned long npages = dir_pages(dir);
+	unsigned long n;
+	char *kaddr;
+	loff_t pos;
+	int err;
+
+	for (n = 0; n <= npages; n++) {
+		char *dir_end;
+
+		page = exofs_get_page(dir, n);
+		err = PTR_ERR(page);
+		if (IS_ERR(page))
+			goto out;
+		lock_page(page);
+		kaddr = page_address(page);
+		dir_end = kaddr + exofs_last_byte(dir, n);
+		de = (struct exofs_dir_entry *)kaddr;
+		kaddr += PAGE_CACHE_SIZE - reclen;
+		while ((char *)de <= kaddr) {
+			if ((char *)de == dir_end) {
+				name_len = 0;
+				rec_len = chunk_size;
+				de->rec_len = cpu_to_le16(chunk_size);
+				de->inode_no = 0;
+				goto got_it;
+			}
+			if (de->rec_len == 0) {
+				EXOFS_ERR("ERROR: exofs_add_link: "
+					"zero-length directory entry");
+				err = -EIO;
+				goto out_unlock;
+			}
+			err = -EEXIST;
+			if (exofs_match(namelen, name, de))
+				goto out_unlock;
+			name_len = EXOFS_DIR_REC_LEN(de->name_len);
+			rec_len = le16_to_cpu(de->rec_len);
+			if (!de->inode_no && rec_len >= reclen)
+				goto got_it;
+			if (rec_len >= name_len + reclen)
+				goto got_it;
+			de = (struct exofs_dir_entry *) ((char *) de + rec_len);
+		}
+		unlock_page(page);
+		exofs_put_page(page);
+	}
+
+	EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode);
+	return -EINVAL;
+
+got_it:
+	pos = page_offset(page) +
+		(char *)de - (char *)page_address(page);
+	err = exofs_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
+	if (err)
+		goto out_unlock;
+	if (de->inode_no) {
+		struct exofs_dir_entry *de1 =
+			(struct exofs_dir_entry *)((char *)de + name_len);
+		de1->rec_len = cpu_to_le16(rec_len - name_len);
+		de->rec_len = cpu_to_le16(name_len);
+		de = de1;
+	}
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+	de->inode_no = cpu_to_le64(inode->i_ino);
+	exofs_set_de_type(de, inode);
+	err = exofs_commit_chunk(page, pos, rec_len);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(dir);
+	sbi->s_numfiles++;
+
+out_put:
+	exofs_put_page(page);
+out:
+	return err;
+out_unlock:
+	unlock_page(page);
+	goto out_put;
+}
+
+int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
+	char *kaddr = page_address(page);
+	unsigned from = ((char *)dir - kaddr) & ~(exofs_chunk_size(inode)-1);
+	unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
+	loff_t pos;
+	struct exofs_dir_entry *pde = NULL;
+	struct exofs_dir_entry *de = (struct exofs_dir_entry *) (kaddr + from);
+	int err;
+
+	while (de < dir) {
+		if (de->rec_len == 0) {
+			EXOFS_ERR("ERROR: exofs_delete_entry:"
+				"zero-length directory entry");
+			err = -EIO;
+			goto out;
+		}
+		pde = de;
+		de = exofs_next_entry(de);
+	}
+	if (pde)
+		from = (char *)pde - (char *)page_address(page);
+	pos = page_offset(page) + from;
+	lock_page(page);
+	err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0,
+							&page, NULL);
+	if (err)
+		EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n",
+			  err);
+	if (pde)
+		pde->rec_len = cpu_to_le16(to - from);
+	dir->inode_no = 0;
+	if (likely(!err))
+		err = exofs_commit_chunk(page, pos, to - from);
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	mark_inode_dirty(inode);
+	sbi->s_numfiles--;
+out:
+	exofs_put_page(page);
+	return err;
+}
+
+/* kept aligned on 4 bytes */
+#define THIS_DIR ".\0\0"
+#define PARENT_DIR "..\0"
+
+int exofs_make_empty(struct inode *inode, struct inode *parent)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page = grab_cache_page(mapping, 0);
+	unsigned chunk_size = exofs_chunk_size(inode);
+	struct exofs_dir_entry *de;
+	int err;
+	void *kaddr;
+
+	if (!page)
+		return -ENOMEM;
+
+	err = exofs_write_begin(NULL, page->mapping, 0, chunk_size, 0,
+							&page, NULL);
+	if (err) {
+		unlock_page(page);
+		goto fail;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	de = (struct exofs_dir_entry *)kaddr;
+	de->name_len = 1;
+	de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
+	memcpy(de->name, THIS_DIR, sizeof(THIS_DIR));
+	de->inode_no = cpu_to_le64(inode->i_ino);
+	exofs_set_de_type(de, inode);
+
+	de = (struct exofs_dir_entry *)(kaddr + EXOFS_DIR_REC_LEN(1));
+	de->name_len = 2;
+	de->rec_len = cpu_to_le16(chunk_size - EXOFS_DIR_REC_LEN(1));
+	de->inode_no = cpu_to_le64(parent->i_ino);
+	memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
+	exofs_set_de_type(de, inode);
+	kunmap_atomic(page, KM_USER0);
+	err = exofs_commit_chunk(page, 0, chunk_size);
+fail:
+	page_cache_release(page);
+	return err;
+}
+
+int exofs_empty_dir(struct inode *inode)
+{
+	struct page *page = NULL;
+	unsigned long i, npages = dir_pages(inode);
+
+	for (i = 0; i < npages; i++) {
+		char *kaddr;
+		struct exofs_dir_entry *de;
+		page = exofs_get_page(inode, i);
+
+		if (IS_ERR(page))
+			continue;
+
+		kaddr = page_address(page);
+		de = (struct exofs_dir_entry *)kaddr;
+		kaddr += exofs_last_byte(inode, i) - EXOFS_DIR_REC_LEN(1);
+
+		while ((char *)de <= kaddr) {
+			if (de->rec_len == 0) {
+				EXOFS_ERR("ERROR: exofs_empty_dir: "
+					  "zero-length directory entry"
+					  "kaddr=%p, de=%p\n", kaddr, de);
+				goto not_empty;
+			}
+			if (de->inode_no != 0) {
+				/* check for . and .. */
+				if (de->name[0] != '.')
+					goto not_empty;
+				if (de->name_len > 2)
+					goto not_empty;
+				if (de->name_len < 2) {
+					if (le64_to_cpu(de->inode_no) !=
+					    inode->i_ino)
+						goto not_empty;
+				} else if (de->name[1] != '.')
+					goto not_empty;
+			}
+			de = exofs_next_entry(de);
+		}
+		exofs_put_page(page);
+	}
+	return 1;
+
+not_empty:
+	exofs_put_page(page);
+	return 0;
+}
+
+const struct file_operations exofs_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= exofs_readdir,
+};
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
new file mode 100644
index 0000000..0fd4c785
--- /dev/null
+++ b/fs/exofs/exofs.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/fs.h>
+#include <linux/time.h>
+#include "common.h"
+
+#ifndef __EXOFS_H__
+#define __EXOFS_H__
+
+#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+
+#ifdef CONFIG_EXOFS_DEBUG
+#define EXOFS_DBGMSG(fmt, a...) \
+	printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a)
+#else
+#define EXOFS_DBGMSG(fmt, a...) \
+	do { if (0) printk(fmt, ##a); } while (0)
+#endif
+
+/* u64 has problems with printk this will cast it to unsigned long long */
+#define _LLU(x) (unsigned long long)(x)
+
+/*
+ * our extension to the in-memory superblock
+ */
+struct exofs_sb_info {
+	struct osd_dev	*s_dev;			/* returned by get_osd_dev    */
+	osd_id		s_pid;			/* partition ID of file system*/
+	int		s_timeout;		/* timeout for OSD operations */
+	uint64_t	s_nextid;		/* highest object ID used     */
+	uint32_t	s_numfiles;		/* number of files on fs      */
+	spinlock_t	s_next_gen_lock;	/* spinlock for gen # update  */
+	u32		s_next_generation;	/* next gen # to use          */
+	atomic_t	s_curr_pending;		/* number of pending commands */
+	uint8_t		s_cred[OSD_CAP_LEN];	/* all-powerful credential    */
+};
+
+/*
+ * our extension to the in-memory inode
+ */
+struct exofs_i_info {
+	unsigned long  i_flags;            /* various atomic flags            */
+	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+	wait_queue_head_t i_wq;            /* wait queue for inode            */
+	uint64_t       i_commit_size;      /* the object's written length     */
+	uint8_t        i_cred[OSD_CAP_LEN];/* all-powerful credential         */
+	struct inode   vfs_inode;          /* normal in-memory inode          */
+};
+
+/*
+ * our inode flags
+ */
+#define OBJ_2BCREATED	0	/* object will be created soon*/
+#define OBJ_CREATED	1	/* object has been created on the osd*/
+
+static inline int obj_2bcreated(struct exofs_i_info *oi)
+{
+	return test_bit(OBJ_2BCREATED, &oi->i_flags);
+}
+
+static inline void set_obj_2bcreated(struct exofs_i_info *oi)
+{
+	set_bit(OBJ_2BCREATED, &oi->i_flags);
+}
+
+static inline int obj_created(struct exofs_i_info *oi)
+{
+	return test_bit(OBJ_CREATED, &oi->i_flags);
+}
+
+static inline void set_obj_created(struct exofs_i_info *oi)
+{
+	set_bit(OBJ_CREATED, &oi->i_flags);
+}
+
+int __exofs_wait_obj_created(struct exofs_i_info *oi);
+static inline int wait_obj_created(struct exofs_i_info *oi)
+{
+	if (likely(obj_created(oi)))
+		return 0;
+
+	return __exofs_wait_obj_created(oi);
+}
+
+/*
+ * get to our inode from the vfs inode
+ */
+static inline struct exofs_i_info *exofs_i(struct inode *inode)
+{
+	return container_of(inode, struct exofs_i_info, vfs_inode);
+}
+
+/*
+ * Maximum count of links to a file
+ */
+#define EXOFS_LINK_MAX           32000
+
+/*************************
+ * function declarations *
+ *************************/
+/* inode.c               */
+void exofs_truncate(struct inode *inode);
+int exofs_setattr(struct dentry *, struct iattr *);
+int exofs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata);
+extern struct inode *exofs_iget(struct super_block *, unsigned long);
+struct inode *exofs_new_inode(struct inode *, int);
+extern int exofs_write_inode(struct inode *, int);
+extern void exofs_delete_inode(struct inode *);
+
+/* dir.c:                */
+int exofs_add_link(struct dentry *, struct inode *);
+ino_t exofs_inode_by_name(struct inode *, struct dentry *);
+int exofs_delete_entry(struct exofs_dir_entry *, struct page *);
+int exofs_make_empty(struct inode *, struct inode *);
+struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *,
+					 struct page **);
+int exofs_empty_dir(struct inode *);
+struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **);
+ino_t exofs_parent_ino(struct dentry *child);
+int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
+		    struct inode *);
+
+/*********************
+ * operation vectors *
+ *********************/
+/* dir.c:            */
+extern const struct file_operations exofs_dir_operations;
+
+/* file.c            */
+extern const struct inode_operations exofs_file_inode_operations;
+extern const struct file_operations exofs_file_operations;
+
+/* inode.c           */
+extern const struct address_space_operations exofs_aops;
+
+/* namei.c           */
+extern const struct inode_operations exofs_dir_inode_operations;
+extern const struct inode_operations exofs_special_inode_operations;
+
+/* symlink.c         */
+extern const struct inode_operations exofs_symlink_inode_operations;
+extern const struct inode_operations exofs_fast_symlink_inode_operations;
+
+#endif
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
new file mode 100644
index 0000000..6ed7fe4
--- /dev/null
+++ b/fs/exofs/file.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/buffer_head.h>
+
+#include "exofs.h"
+
+static int exofs_release_file(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
+			    int datasync)
+{
+	int ret;
+	struct address_space *mapping = filp->f_mapping;
+
+	ret = filemap_write_and_wait(mapping);
+	if (ret)
+		return ret;
+
+	/*Note: file_fsync below also calles sync_blockdev, which is a no-op
+	 *      for exofs, but other then that it does sync_inode and
+	 *      sync_superblock which is what we need here.
+	 */
+	return file_fsync(filp, dentry, datasync);
+}
+
+static int exofs_flush(struct file *file, fl_owner_t id)
+{
+	exofs_file_fsync(file, file->f_path.dentry, 1);
+	/* TODO: Flush the OSD target */
+	return 0;
+}
+
+const struct file_operations exofs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= generic_file_aio_write,
+	.mmap		= generic_file_mmap,
+	.open		= generic_file_open,
+	.release	= exofs_release_file,
+	.fsync		= exofs_file_fsync,
+	.flush		= exofs_flush,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+};
+
+const struct inode_operations exofs_file_inode_operations = {
+	.truncate	= exofs_truncate,
+	.setattr	= exofs_setattr,
+};
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
new file mode 100644
index 0000000..ba8d9fa
--- /dev/null
+++ b/fs/exofs/inode.c
@@ -0,0 +1,1303 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
+#include <scsi/scsi_device.h>
+
+#include "exofs.h"
+
+#ifdef CONFIG_EXOFS_DEBUG
+#  define EXOFS_DEBUG_OBJ_ISIZE 1
+#endif
+
+struct page_collect {
+	struct exofs_sb_info *sbi;
+	struct request_queue *req_q;
+	struct inode *inode;
+	unsigned expected_pages;
+
+	struct bio *bio;
+	unsigned nr_pages;
+	unsigned long length;
+	loff_t pg_first; /* keep 64bit also in 32-arches */
+};
+
+static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
+		struct inode *inode)
+{
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
+	struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
+
+	pcol->sbi = sbi;
+	pcol->req_q = req_q;
+	pcol->inode = inode;
+	pcol->expected_pages = expected_pages;
+
+	pcol->bio = NULL;
+	pcol->nr_pages = 0;
+	pcol->length = 0;
+	pcol->pg_first = -1;
+
+	EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
+		     expected_pages);
+}
+
+static void _pcol_reset(struct page_collect *pcol)
+{
+	pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
+
+	pcol->bio = NULL;
+	pcol->nr_pages = 0;
+	pcol->length = 0;
+	pcol->pg_first = -1;
+	EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n",
+		     pcol->inode->i_ino, pcol->expected_pages);
+
+	/* this is probably the end of the loop but in writes
+	 * it might not end here. don't be left with nothing
+	 */
+	if (!pcol->expected_pages)
+		pcol->expected_pages = 128;
+}
+
+static int pcol_try_alloc(struct page_collect *pcol)
+{
+	int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES);
+
+	for (; pages; pages >>= 1) {
+		pcol->bio = bio_alloc(GFP_KERNEL, pages);
+		if (likely(pcol->bio))
+			return 0;
+	}
+
+	EXOFS_ERR("Failed to kcalloc expected_pages=%u\n",
+		  pcol->expected_pages);
+	return -ENOMEM;
+}
+
+static void pcol_free(struct page_collect *pcol)
+{
+	bio_put(pcol->bio);
+	pcol->bio = NULL;
+}
+
+static int pcol_add_page(struct page_collect *pcol, struct page *page,
+			 unsigned len)
+{
+	int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
+	if (unlikely(len != added_len))
+		return -ENOMEM;
+
+	++pcol->nr_pages;
+	pcol->length += len;
+	return 0;
+}
+
+static int update_read_page(struct page *page, int ret)
+{
+	if (ret == 0) {
+		/* Everything is OK */
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+	} else if (ret == -EFAULT) {
+		/* In this case we were trying to read something that wasn't on
+		 * disk yet - return a page full of zeroes.  This should be OK,
+		 * because the object should be empty (if there was a write
+		 * before this read, the read would be waiting with the page
+		 * locked */
+		clear_highpage(page);
+
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+		ret = 0; /* recovered error */
+		EXOFS_DBGMSG("recovered read error\n");
+	} else /* Error */
+		SetPageError(page);
+
+	return ret;
+}
+
+static void update_write_page(struct page *page, int ret)
+{
+	if (ret) {
+		mapping_set_error(page->mapping, ret);
+		SetPageError(page);
+	}
+	end_page_writeback(page);
+}
+
+/* Called at the end of reads, to optionally unlock pages and update their
+ * status.
+ */
+static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
+			    bool do_unlock)
+{
+	struct bio_vec *bvec;
+	int i;
+	u64 resid;
+	u64 good_bytes;
+	u64 length = 0;
+	int ret = exofs_check_ok_resid(or, &resid, NULL);
+
+	osd_end_request(or);
+
+	if (likely(!ret))
+		good_bytes = pcol->length;
+	else if (!resid)
+		good_bytes = 0;
+	else
+		good_bytes = pcol->length - resid;
+
+	EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
+		     " length=0x%lx nr_pages=%u\n",
+		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
+		     pcol->nr_pages);
+
+	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
+		struct page *page = bvec->bv_page;
+		struct inode *inode = page->mapping->host;
+		int page_stat;
+
+		if (inode != pcol->inode)
+			continue; /* osd might add more pages at end */
+
+		if (likely(length < good_bytes))
+			page_stat = 0;
+		else
+			page_stat = ret;
+
+		EXOFS_DBGMSG("    readpages_done(0x%lx, 0x%lx) %s\n",
+			  inode->i_ino, page->index,
+			  page_stat ? "bad_bytes" : "good_bytes");
+
+		ret = update_read_page(page, page_stat);
+		if (do_unlock)
+			unlock_page(page);
+		length += bvec->bv_len;
+	}
+
+	pcol_free(pcol);
+	EXOFS_DBGMSG("readpages_done END\n");
+	return ret;
+}
+
+/* callback of async reads */
+static void readpages_done(struct osd_request *or, void *p)
+{
+	struct page_collect *pcol = p;
+
+	__readpages_done(or, pcol, true);
+	atomic_dec(&pcol->sbi->s_curr_pending);
+	kfree(p);
+}
+
+static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
+{
+	struct bio_vec *bvec;
+	int i;
+
+	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
+		struct page *page = bvec->bv_page;
+
+		if (rw == READ)
+			update_read_page(page, ret);
+		else
+			update_write_page(page, ret);
+
+		unlock_page(page);
+	}
+	pcol_free(pcol);
+}
+
+static int read_exec(struct page_collect *pcol, bool is_sync)
+{
+	struct exofs_i_info *oi = exofs_i(pcol->inode);
+	struct osd_obj_id obj = {pcol->sbi->s_pid,
+					pcol->inode->i_ino + EXOFS_OBJ_OFF};
+	struct osd_request *or = NULL;
+	struct page_collect *pcol_copy = NULL;
+	loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
+	int ret;
+
+	if (!pcol->bio)
+		return 0;
+
+	/* see comment in _readpage() about sync reads */
+	WARN_ON(is_sync && (pcol->nr_pages != 1));
+
+	or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	osd_req_read(or, &obj, pcol->bio, i_start);
+
+	if (is_sync) {
+		exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
+		return __readpages_done(or, pcol, false);
+	}
+
+	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
+	if (!pcol_copy) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	*pcol_copy = *pcol;
+	ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred);
+	if (unlikely(ret))
+		goto err;
+
+	atomic_inc(&pcol->sbi->s_curr_pending);
+
+	EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
+		  obj.id, _LLU(i_start), pcol->length);
+
+	/* pages ownership was passed to pcol_copy */
+	_pcol_reset(pcol);
+	return 0;
+
+err:
+	if (!is_sync)
+		_unlock_pcol_pages(pcol, ret, READ);
+	kfree(pcol_copy);
+	if (or)
+		osd_end_request(or);
+	return ret;
+}
+
+/* readpage_strip is called either directly from readpage() or by the VFS from
+ * within read_cache_pages(), to add one more page to be read. It will try to
+ * collect as many contiguous pages as posible. If a discontinuity is
+ * encountered, or it runs out of resources, it will submit the previous segment
+ * and will start a new collection. Eventually caller must submit the last
+ * segment if present.
+ */
+static int readpage_strip(void *data, struct page *page)
+{
+	struct page_collect *pcol = data;
+	struct inode *inode = pcol->inode;
+	struct exofs_i_info *oi = exofs_i(inode);
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	size_t len;
+	int ret;
+
+	/* FIXME: Just for debugging, will be removed */
+	if (PageUptodate(page))
+		EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
+			  page->index);
+
+	if (page->index < end_index)
+		len = PAGE_CACHE_SIZE;
+	else if (page->index == end_index)
+		len = i_size & ~PAGE_CACHE_MASK;
+	else
+		len = 0;
+
+	if (!len || !obj_created(oi)) {
+		/* this will be out of bounds, or doesn't exist yet.
+		 * Current page is cleared and the request is split
+		 */
+		clear_highpage(page);
+
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+
+		unlock_page(page);
+		EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
+			     " splitting\n", inode->i_ino, page->index);
+
+		return read_exec(pcol, false);
+	}
+
+try_again:
+
+	if (unlikely(pcol->pg_first == -1)) {
+		pcol->pg_first = page->index;
+	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
+		   page->index)) {
+		/* Discontinuity detected, split the request */
+		ret = read_exec(pcol, false);
+		if (unlikely(ret))
+			goto fail;
+		goto try_again;
+	}
+
+	if (!pcol->bio) {
+		ret = pcol_try_alloc(pcol);
+		if (unlikely(ret))
+			goto fail;
+	}
+
+	if (len != PAGE_CACHE_SIZE)
+		zero_user(page, len, PAGE_CACHE_SIZE - len);
+
+	EXOFS_DBGMSG("    readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
+		     inode->i_ino, page->index, len);
+
+	ret = pcol_add_page(pcol, page, len);
+	if (ret) {
+		EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p "
+			  "this_len=0x%zx nr_pages=%u length=0x%lx\n",
+			  page, len, pcol->nr_pages, pcol->length);
+
+		/* split the request, and start again with current page */
+		ret = read_exec(pcol, false);
+		if (unlikely(ret))
+			goto fail;
+
+		goto try_again;
+	}
+
+	return 0;
+
+fail:
+	/* SetPageError(page); ??? */
+	unlock_page(page);
+	return ret;
+}
+
+static int exofs_readpages(struct file *file, struct address_space *mapping,
+			   struct list_head *pages, unsigned nr_pages)
+{
+	struct page_collect pcol;
+	int ret;
+
+	_pcol_init(&pcol, nr_pages, mapping->host);
+
+	ret = read_cache_pages(mapping, pages, readpage_strip, &pcol);
+	if (ret) {
+		EXOFS_ERR("read_cache_pages => %d\n", ret);
+		return ret;
+	}
+
+	return read_exec(&pcol, false);
+}
+
+static int _readpage(struct page *page, bool is_sync)
+{
+	struct page_collect pcol;
+	int ret;
+
+	_pcol_init(&pcol, 1, page->mapping->host);
+
+	/* readpage_strip might call read_exec(,async) inside at several places
+	 * but this is safe for is_async=0 since read_exec will not do anything
+	 * when we have a single page.
+	 */
+	ret = readpage_strip(&pcol, page);
+	if (ret) {
+		EXOFS_ERR("_readpage => %d\n", ret);
+		return ret;
+	}
+
+	return read_exec(&pcol, is_sync);
+}
+
+/*
+ * We don't need the file
+ */
+static int exofs_readpage(struct file *file, struct page *page)
+{
+	return _readpage(page, false);
+}
+
+/* Callback for osd_write. All writes are asynchronouse */
+static void writepages_done(struct osd_request *or, void *p)
+{
+	struct page_collect *pcol = p;
+	struct bio_vec *bvec;
+	int i;
+	u64 resid;
+	u64  good_bytes;
+	u64  length = 0;
+
+	int ret = exofs_check_ok_resid(or, NULL, &resid);
+
+	osd_end_request(or);
+	atomic_dec(&pcol->sbi->s_curr_pending);
+
+	if (likely(!ret))
+		good_bytes = pcol->length;
+	else if (!resid)
+		good_bytes = 0;
+	else
+		good_bytes = pcol->length - resid;
+
+	EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
+		     " length=0x%lx nr_pages=%u\n",
+		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
+		     pcol->nr_pages);
+
+	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
+		struct page *page = bvec->bv_page;
+		struct inode *inode = page->mapping->host;
+		int page_stat;
+
+		if (inode != pcol->inode)
+			continue; /* osd might add more pages to a bio */
+
+		if (likely(length < good_bytes))
+			page_stat = 0;
+		else
+			page_stat = ret;
+
+		update_write_page(page, page_stat);
+		unlock_page(page);
+		EXOFS_DBGMSG("    writepages_done(0x%lx, 0x%lx) status=%d\n",
+			     inode->i_ino, page->index, page_stat);
+
+		length += bvec->bv_len;
+	}
+
+	pcol_free(pcol);
+	kfree(pcol);
+	EXOFS_DBGMSG("writepages_done END\n");
+}
+
+static int write_exec(struct page_collect *pcol)
+{
+	struct exofs_i_info *oi = exofs_i(pcol->inode);
+	struct osd_obj_id obj = {pcol->sbi->s_pid,
+					pcol->inode->i_ino + EXOFS_OBJ_OFF};
+	struct osd_request *or = NULL;
+	struct page_collect *pcol_copy = NULL;
+	loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
+	int ret;
+
+	if (!pcol->bio)
+		return 0;
+
+	or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
+	if (!pcol_copy) {
+		EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	*pcol_copy = *pcol;
+
+	osd_req_write(or, &obj, pcol_copy->bio, i_start);
+	ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
+	if (unlikely(ret)) {
+		EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
+		goto err;
+	}
+
+	atomic_inc(&pcol->sbi->s_curr_pending);
+	EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
+		  pcol->inode->i_ino, pcol->pg_first, _LLU(i_start),
+		  pcol->length);
+	/* pages ownership was passed to pcol_copy */
+	_pcol_reset(pcol);
+	return 0;
+
+err:
+	_unlock_pcol_pages(pcol, ret, WRITE);
+	kfree(pcol_copy);
+	if (or)
+		osd_end_request(or);
+	return ret;
+}
+
+/* writepage_strip is called either directly from writepage() or by the VFS from
+ * within write_cache_pages(), to add one more page to be written to storage.
+ * It will try to collect as many contiguous pages as possible. If a
+ * discontinuity is encountered or it runs out of resources it will submit the
+ * previous segment and will start a new collection.
+ * Eventually caller must submit the last segment if present.
+ */
+static int writepage_strip(struct page *page,
+			   struct writeback_control *wbc_unused, void *data)
+{
+	struct page_collect *pcol = data;
+	struct inode *inode = pcol->inode;
+	struct exofs_i_info *oi = exofs_i(inode);
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	size_t len;
+	int ret;
+
+	BUG_ON(!PageLocked(page));
+
+	ret = wait_obj_created(oi);
+	if (unlikely(ret))
+		goto fail;
+
+	if (page->index < end_index)
+		/* in this case, the page is within the limits of the file */
+		len = PAGE_CACHE_SIZE;
+	else {
+		len = i_size & ~PAGE_CACHE_MASK;
+
+		if (page->index > end_index || !len) {
+			/* in this case, the page is outside the limits
+			 * (truncate in progress)
+			 */
+			ret = write_exec(pcol);
+			if (unlikely(ret))
+				goto fail;
+			if (PageError(page))
+				ClearPageError(page);
+			unlock_page(page);
+			return 0;
+		}
+	}
+
+try_again:
+
+	if (unlikely(pcol->pg_first == -1)) {
+		pcol->pg_first = page->index;
+	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
+		   page->index)) {
+		/* Discontinuity detected, split the request */
+		ret = write_exec(pcol);
+		if (unlikely(ret))
+			goto fail;
+		goto try_again;
+	}
+
+	if (!pcol->bio) {
+		ret = pcol_try_alloc(pcol);
+		if (unlikely(ret))
+			goto fail;
+	}
+
+	EXOFS_DBGMSG("    writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
+		     inode->i_ino, page->index, len);
+
+	ret = pcol_add_page(pcol, page, len);
+	if (unlikely(ret)) {
+		EXOFS_DBGMSG("Failed pcol_add_page "
+			     "nr_pages=%u total_length=0x%lx\n",
+			     pcol->nr_pages, pcol->length);
+
+		/* split the request, next loop will start again */
+		ret = write_exec(pcol);
+		if (unlikely(ret)) {
+			EXOFS_DBGMSG("write_exec faild => %d", ret);
+			goto fail;
+		}
+
+		goto try_again;
+	}
+
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+
+	return 0;
+
+fail:
+	set_bit(AS_EIO, &page->mapping->flags);
+	unlock_page(page);
+	return ret;
+}
+
+static int exofs_writepages(struct address_space *mapping,
+		       struct writeback_control *wbc)
+{
+	struct page_collect pcol;
+	long start, end, expected_pages;
+	int ret;
+
+	start = wbc->range_start >> PAGE_CACHE_SHIFT;
+	end = (wbc->range_end == LLONG_MAX) ?
+			start + mapping->nrpages :
+			wbc->range_end >> PAGE_CACHE_SHIFT;
+
+	if (start || end)
+		expected_pages = min(end - start + 1, 32L);
+	else
+		expected_pages = mapping->nrpages;
+
+	EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx"
+		     " m->nrpages=%lu start=0x%lx end=0x%lx\n",
+		     mapping->host->i_ino, wbc->range_start, wbc->range_end,
+		     mapping->nrpages, start, end);
+
+	_pcol_init(&pcol, expected_pages, mapping->host);
+
+	ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
+	if (ret) {
+		EXOFS_ERR("write_cache_pages => %d\n", ret);
+		return ret;
+	}
+
+	return write_exec(&pcol);
+}
+
+static int exofs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct page_collect pcol;
+	int ret;
+
+	_pcol_init(&pcol, 1, page->mapping->host);
+
+	ret = writepage_strip(page, NULL, &pcol);
+	if (ret) {
+		EXOFS_ERR("exofs_writepage => %d\n", ret);
+		return ret;
+	}
+
+	return write_exec(&pcol);
+}
+
+int exofs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	int ret = 0;
+	struct page *page;
+
+	page = *pagep;
+	if (page == NULL) {
+		ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
+					 fsdata);
+		if (ret) {
+			EXOFS_DBGMSG("simple_write_begin faild\n");
+			return ret;
+		}
+
+		page = *pagep;
+	}
+
+	 /* read modify write */
+	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+		ret = _readpage(page, true);
+		if (ret) {
+			/*SetPageError was done by _readpage. Is it ok?*/
+			unlock_page(page);
+			EXOFS_DBGMSG("__readpage_filler faild\n");
+		}
+	}
+
+	return ret;
+}
+
+static int exofs_write_begin_export(struct file *file,
+		struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	*pagep = NULL;
+
+	return exofs_write_begin(file, mapping, pos, len, flags, pagep,
+					fsdata);
+}
+
+const struct address_space_operations exofs_aops = {
+	.readpage	= exofs_readpage,
+	.readpages	= exofs_readpages,
+	.writepage	= exofs_writepage,
+	.writepages	= exofs_writepages,
+	.write_begin	= exofs_write_begin_export,
+	.write_end	= simple_write_end,
+};
+
+/******************************************************************************
+ * INODE OPERATIONS
+ *****************************************************************************/
+
+/*
+ * Test whether an inode is a fast symlink.
+ */
+static inline int exofs_inode_is_fast_symlink(struct inode *inode)
+{
+	struct exofs_i_info *oi = exofs_i(inode);
+
+	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
+}
+
+/*
+ * get_block_t - Fill in a buffer_head
+ * An OSD takes care of block allocation so we just fake an allocation by
+ * putting in the inode's sector_t in the buffer_head.
+ * TODO: What about the case of create==0 and @iblock does not exist in the
+ * object?
+ */
+static int exofs_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create)
+{
+	map_bh(bh_result, inode->i_sb, iblock);
+	return 0;
+}
+
+const struct osd_attr g_attr_logical_length = ATTR_DEF(
+	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
+
+/*
+ * Truncate a file to the specified size - all we have to do is set the size
+ * attribute.  We make sure the object exists first.
+ */
+void exofs_truncate(struct inode *inode)
+{
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
+	struct exofs_i_info *oi = exofs_i(inode);
+	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
+	struct osd_request *or;
+	struct osd_attr attr;
+	loff_t isize = i_size_read(inode);
+	__be64 newsize;
+	int ret;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+	     || S_ISLNK(inode->i_mode)))
+		return;
+	if (exofs_inode_is_fast_symlink(inode))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+	nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
+		goto fail;
+	}
+
+	osd_req_set_attributes(or, &obj);
+
+	newsize = cpu_to_be64((u64)isize);
+	attr = g_attr_logical_length;
+	attr.val_ptr = &newsize;
+	osd_req_add_set_attr_list(or, &attr, 1);
+
+	/* if we are about to truncate an object, and it hasn't been
+	 * created yet, wait
+	 */
+	if (unlikely(wait_obj_created(oi)))
+		goto fail;
+
+	ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
+	osd_end_request(or);
+	if (ret)
+		goto fail;
+
+out:
+	mark_inode_dirty(inode);
+	return;
+fail:
+	make_bad_inode(inode);
+	goto out;
+}
+
+/*
+ * Set inode attributes - just call generic functions.
+ */
+int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, iattr);
+	if (error)
+		return error;
+
+	error = inode_setattr(inode, iattr);
+	return error;
+}
+
+/*
+ * Read an inode from the OSD, and return it as is.  We also return the size
+ * attribute in the 'sanity' argument if we got compiled with debugging turned
+ * on.
+ */
+static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
+		    struct exofs_fcb *inode, uint64_t *sanity)
+{
+	struct exofs_sb_info *sbi = sb->s_fs_info;
+	struct osd_request *or;
+	struct osd_attr attr;
+	struct osd_obj_id obj = {sbi->s_pid,
+				 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
+	int ret;
+
+	exofs_make_credential(oi->i_cred, &obj);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n");
+		return -ENOMEM;
+	}
+	osd_req_get_attributes(or, &obj);
+
+	/* we need the inode attribute */
+	osd_req_add_get_attr_list(or, &g_attr_inode_data, 1);
+
+#ifdef EXOFS_DEBUG_OBJ_ISIZE
+	/* we get the size attributes to do a sanity check */
+	osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
+#endif
+
+	ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
+	if (ret)
+		goto out;
+
+	attr = g_attr_inode_data;
+	ret = extract_attr_from_req(or, &attr);
+	if (ret) {
+		EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n");
+		goto out;
+	}
+
+	WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE);
+	memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
+
+#ifdef EXOFS_DEBUG_OBJ_ISIZE
+	attr = g_attr_logical_length;
+	ret = extract_attr_from_req(or, &attr);
+	if (ret) {
+		EXOFS_ERR("ERROR: extract attr from or failed\n");
+		goto out;
+	}
+	*sanity = get_unaligned_be64(attr.val_ptr);
+#endif
+
+out:
+	osd_end_request(or);
+	return ret;
+}
+
+/*
+ * Fill in an inode read from the OSD and set it up for use
+ */
+struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
+{
+	struct exofs_i_info *oi;
+	struct exofs_fcb fcb;
+	struct inode *inode;
+	uint64_t uninitialized_var(sanity);
+	int ret;
+
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+	oi = exofs_i(inode);
+
+	/* read the inode from the osd */
+	ret = exofs_get_inode(sb, oi, &fcb, &sanity);
+	if (ret)
+		goto bad_inode;
+
+	init_waitqueue_head(&oi->i_wq);
+	set_obj_created(oi);
+
+	/* copy stuff from on-disk struct to in-memory struct */
+	inode->i_mode = le16_to_cpu(fcb.i_mode);
+	inode->i_uid = le32_to_cpu(fcb.i_uid);
+	inode->i_gid = le32_to_cpu(fcb.i_gid);
+	inode->i_nlink = le16_to_cpu(fcb.i_links_count);
+	inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
+	inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
+	inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
+	inode->i_ctime.tv_nsec =
+		inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
+	oi->i_commit_size = le64_to_cpu(fcb.i_size);
+	i_size_write(inode, oi->i_commit_size);
+	inode->i_blkbits = EXOFS_BLKSHIFT;
+	inode->i_generation = le32_to_cpu(fcb.i_generation);
+
+#ifdef EXOFS_DEBUG_OBJ_ISIZE
+	if ((inode->i_size != sanity) &&
+		(!exofs_inode_is_fast_symlink(inode))) {
+		EXOFS_ERR("WARNING: Size of object from inode and "
+			  "attributes differ (%lld != %llu)\n",
+			  inode->i_size, _LLU(sanity));
+	}
+#endif
+
+	oi->i_dir_start_lookup = 0;
+
+	if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
+		ret = -ESTALE;
+		goto bad_inode;
+	}
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		if (fcb.i_data[0])
+			inode->i_rdev =
+				old_decode_dev(le32_to_cpu(fcb.i_data[0]));
+		else
+			inode->i_rdev =
+				new_decode_dev(le32_to_cpu(fcb.i_data[1]));
+	} else {
+		memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
+	}
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &exofs_file_inode_operations;
+		inode->i_fop = &exofs_file_operations;
+		inode->i_mapping->a_ops = &exofs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &exofs_dir_inode_operations;
+		inode->i_fop = &exofs_dir_operations;
+		inode->i_mapping->a_ops = &exofs_aops;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (exofs_inode_is_fast_symlink(inode))
+			inode->i_op = &exofs_fast_symlink_inode_operations;
+		else {
+			inode->i_op = &exofs_symlink_inode_operations;
+			inode->i_mapping->a_ops = &exofs_aops;
+		}
+	} else {
+		inode->i_op = &exofs_special_inode_operations;
+		if (fcb.i_data[0])
+			init_special_inode(inode, inode->i_mode,
+			   old_decode_dev(le32_to_cpu(fcb.i_data[0])));
+		else
+			init_special_inode(inode, inode->i_mode,
+			   new_decode_dev(le32_to_cpu(fcb.i_data[1])));
+	}
+
+	unlock_new_inode(inode);
+	return inode;
+
+bad_inode:
+	iget_failed(inode);
+	return ERR_PTR(ret);
+}
+
+int __exofs_wait_obj_created(struct exofs_i_info *oi)
+{
+	if (!obj_created(oi)) {
+		BUG_ON(!obj_2bcreated(oi));
+		wait_event(oi->i_wq, obj_created(oi));
+	}
+	return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
+}
+/*
+ * Callback function from exofs_new_inode().  The important thing is that we
+ * set the obj_created flag so that other methods know that the object exists on
+ * the OSD.
+ */
+static void create_done(struct osd_request *or, void *p)
+{
+	struct inode *inode = p;
+	struct exofs_i_info *oi = exofs_i(inode);
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
+	int ret;
+
+	ret = exofs_check_ok(or);
+	osd_end_request(or);
+	atomic_dec(&sbi->s_curr_pending);
+
+	if (unlikely(ret)) {
+		EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
+			  _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF));
+		make_bad_inode(inode);
+	} else
+		set_obj_created(oi);
+
+	atomic_dec(&inode->i_count);
+	wake_up(&oi->i_wq);
+}
+
+/*
+ * Set up a new inode and create an object for it on the OSD
+ */
+struct inode *exofs_new_inode(struct inode *dir, int mode)
+{
+	struct super_block *sb;
+	struct inode *inode;
+	struct exofs_i_info *oi;
+	struct exofs_sb_info *sbi;
+	struct osd_request *or;
+	struct osd_obj_id obj;
+	int ret;
+
+	sb = dir->i_sb;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	oi = exofs_i(inode);
+
+	init_waitqueue_head(&oi->i_wq);
+	set_obj_2bcreated(oi);
+
+	sbi = sb->s_fs_info;
+
+	sb->s_dirt = 1;
+	inode->i_uid = current->cred->fsuid;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else {
+		inode->i_gid = current->cred->fsgid;
+	}
+	inode->i_mode = mode;
+
+	inode->i_ino = sbi->s_nextid++;
+	inode->i_blkbits = EXOFS_BLKSHIFT;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	oi->i_commit_size = inode->i_size = 0;
+	spin_lock(&sbi->s_next_gen_lock);
+	inode->i_generation = sbi->s_next_generation++;
+	spin_unlock(&sbi->s_next_gen_lock);
+	insert_inode_hash(inode);
+
+	mark_inode_dirty(inode);
+
+	obj.partition = sbi->s_pid;
+	obj.id = inode->i_ino + EXOFS_OBJ_OFF;
+	exofs_make_credential(oi->i_cred, &obj);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	osd_req_create_object(or, &obj);
+
+	/* increment the refcount so that the inode will still be around when we
+	 * reach the callback
+	 */
+	atomic_inc(&inode->i_count);
+
+	ret = exofs_async_op(or, create_done, inode, oi->i_cred);
+	if (ret) {
+		atomic_dec(&inode->i_count);
+		osd_end_request(or);
+		return ERR_PTR(-EIO);
+	}
+	atomic_inc(&sbi->s_curr_pending);
+
+	return inode;
+}
+
+/*
+ * struct to pass two arguments to update_inode's callback
+ */
+struct updatei_args {
+	struct exofs_sb_info	*sbi;
+	struct exofs_fcb	fcb;
+};
+
+/*
+ * Callback function from exofs_update_inode().
+ */
+static void updatei_done(struct osd_request *or, void *p)
+{
+	struct updatei_args *args = p;
+
+	osd_end_request(or);
+
+	atomic_dec(&args->sbi->s_curr_pending);
+
+	kfree(args);
+}
+
+/*
+ * Write the inode to the OSD.  Just fill up the struct, and set the attribute
+ * synchronously or asynchronously depending on the do_sync flag.
+ */
+static int exofs_update_inode(struct inode *inode, int do_sync)
+{
+	struct exofs_i_info *oi = exofs_i(inode);
+	struct super_block *sb = inode->i_sb;
+	struct exofs_sb_info *sbi = sb->s_fs_info;
+	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
+	struct osd_request *or;
+	struct osd_attr attr;
+	struct exofs_fcb *fcb;
+	struct updatei_args *args;
+	int ret;
+
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	fcb = &args->fcb;
+
+	fcb->i_mode = cpu_to_le16(inode->i_mode);
+	fcb->i_uid = cpu_to_le32(inode->i_uid);
+	fcb->i_gid = cpu_to_le32(inode->i_gid);
+	fcb->i_links_count = cpu_to_le16(inode->i_nlink);
+	fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+	fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+	fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+	oi->i_commit_size = i_size_read(inode);
+	fcb->i_size = cpu_to_le64(oi->i_commit_size);
+	fcb->i_generation = cpu_to_le32(inode->i_generation);
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		if (old_valid_dev(inode->i_rdev)) {
+			fcb->i_data[0] =
+				cpu_to_le32(old_encode_dev(inode->i_rdev));
+			fcb->i_data[1] = 0;
+		} else {
+			fcb->i_data[0] = 0;
+			fcb->i_data[1] =
+				cpu_to_le32(new_encode_dev(inode->i_rdev));
+			fcb->i_data[2] = 0;
+		}
+	} else
+		memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n");
+		ret = -ENOMEM;
+		goto free_args;
+	}
+
+	osd_req_set_attributes(or, &obj);
+
+	attr = g_attr_inode_data;
+	attr.val_ptr = fcb;
+	osd_req_add_set_attr_list(or, &attr, 1);
+
+	if (!obj_created(oi)) {
+		EXOFS_DBGMSG("!obj_created\n");
+		BUG_ON(!obj_2bcreated(oi));
+		wait_event(oi->i_wq, obj_created(oi));
+		EXOFS_DBGMSG("wait_event done\n");
+	}
+
+	if (do_sync) {
+		ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
+		osd_end_request(or);
+		goto free_args;
+	} else {
+		args->sbi = sbi;
+
+		ret = exofs_async_op(or, updatei_done, args, oi->i_cred);
+		if (ret) {
+			osd_end_request(or);
+			goto free_args;
+		}
+		atomic_inc(&sbi->s_curr_pending);
+		goto out; /* deallocation in updatei_done */
+	}
+
+free_args:
+	kfree(args);
+out:
+	EXOFS_DBGMSG("ret=>%d\n", ret);
+	return ret;
+}
+
+int exofs_write_inode(struct inode *inode, int wait)
+{
+	return exofs_update_inode(inode, wait);
+}
+
+/*
+ * Callback function from exofs_delete_inode() - don't have much cleaning up to
+ * do.
+ */
+static void delete_done(struct osd_request *or, void *p)
+{
+	struct exofs_sb_info *sbi;
+	osd_end_request(or);
+	sbi = p;
+	atomic_dec(&sbi->s_curr_pending);
+}
+
+/*
+ * Called when the refcount of an inode reaches zero.  We remove the object
+ * from the OSD here.  We make sure the object was created before we try and
+ * delete it.
+ */
+void exofs_delete_inode(struct inode *inode)
+{
+	struct exofs_i_info *oi = exofs_i(inode);
+	struct super_block *sb = inode->i_sb;
+	struct exofs_sb_info *sbi = sb->s_fs_info;
+	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
+	struct osd_request *or;
+	int ret;
+
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (is_bad_inode(inode))
+		goto no_delete;
+
+	mark_inode_dirty(inode);
+	exofs_update_inode(inode, inode_needs_sync(inode));
+
+	inode->i_size = 0;
+	if (inode->i_blocks)
+		exofs_truncate(inode);
+
+	clear_inode(inode);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n");
+		return;
+	}
+
+	osd_req_remove_object(or, &obj);
+
+	/* if we are deleting an obj that hasn't been created yet, wait */
+	if (!obj_created(oi)) {
+		BUG_ON(!obj_2bcreated(oi));
+		wait_event(oi->i_wq, obj_created(oi));
+	}
+
+	ret = exofs_async_op(or, delete_done, sbi, oi->i_cred);
+	if (ret) {
+		EXOFS_ERR(
+		       "ERROR: @exofs_delete_inode exofs_async_op failed\n");
+		osd_end_request(or);
+		return;
+	}
+	atomic_inc(&sbi->s_curr_pending);
+
+	return;
+
+no_delete:
+	clear_inode(inode);
+}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
new file mode 100644
index 0000000..77fdd76
--- /dev/null
+++ b/fs/exofs/namei.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "exofs.h"
+
+static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
+{
+	int err = exofs_add_link(dentry, inode);
+	if (!err) {
+		d_instantiate(dentry, inode);
+		return 0;
+	}
+	inode_dec_link_count(inode);
+	iput(inode);
+	return err;
+}
+
+static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
+				   struct nameidata *nd)
+{
+	struct inode *inode;
+	ino_t ino;
+
+	if (dentry->d_name.len > EXOFS_NAME_LEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	ino = exofs_inode_by_name(dir, dentry);
+	inode = NULL;
+	if (ino) {
+		inode = exofs_iget(dir->i_sb, ino);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
+			 struct nameidata *nd)
+{
+	struct inode *inode = exofs_new_inode(dir, mode);
+	int err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		inode->i_op = &exofs_file_inode_operations;
+		inode->i_fop = &exofs_file_operations;
+		inode->i_mapping->a_ops = &exofs_aops;
+		mark_inode_dirty(inode);
+		err = exofs_add_nondir(dentry, inode);
+	}
+	return err;
+}
+
+static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		       dev_t rdev)
+{
+	struct inode *inode;
+	int err;
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	inode = exofs_new_inode(dir, mode);
+	err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		init_special_inode(inode, inode->i_mode, rdev);
+		mark_inode_dirty(inode);
+		err = exofs_add_nondir(dentry, inode);
+	}
+	return err;
+}
+
+static int exofs_symlink(struct inode *dir, struct dentry *dentry,
+			  const char *symname)
+{
+	struct super_block *sb = dir->i_sb;
+	int err = -ENAMETOOLONG;
+	unsigned l = strlen(symname)+1;
+	struct inode *inode;
+	struct exofs_i_info *oi;
+
+	if (l > sb->s_blocksize)
+		goto out;
+
+	inode = exofs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out;
+
+	oi = exofs_i(inode);
+	if (l > sizeof(oi->i_data)) {
+		/* slow symlink */
+		inode->i_op = &exofs_symlink_inode_operations;
+		inode->i_mapping->a_ops = &exofs_aops;
+		memset(oi->i_data, 0, sizeof(oi->i_data));
+
+		err = page_symlink(inode, symname, l);
+		if (err)
+			goto out_fail;
+	} else {
+		/* fast symlink */
+		inode->i_op = &exofs_fast_symlink_inode_operations;
+		memcpy(oi->i_data, symname, l);
+		inode->i_size = l-1;
+	}
+	mark_inode_dirty(inode);
+
+	err = exofs_add_nondir(dentry, inode);
+out:
+	return err;
+
+out_fail:
+	inode_dec_link_count(inode);
+	iput(inode);
+	goto out;
+}
+
+static int exofs_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+
+	if (inode->i_nlink >= EXOFS_LINK_MAX)
+		return -EMLINK;
+
+	inode->i_ctime = CURRENT_TIME;
+	inode_inc_link_count(inode);
+	atomic_inc(&inode->i_count);
+
+	return exofs_add_nondir(dentry, inode);
+}
+
+static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct inode *inode;
+	int err = -EMLINK;
+
+	if (dir->i_nlink >= EXOFS_LINK_MAX)
+		goto out;
+
+	inode_inc_link_count(dir);
+
+	inode = exofs_new_inode(dir, S_IFDIR | mode);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_dir;
+
+	inode->i_op = &exofs_dir_inode_operations;
+	inode->i_fop = &exofs_dir_operations;
+	inode->i_mapping->a_ops = &exofs_aops;
+
+	inode_inc_link_count(inode);
+
+	err = exofs_make_empty(inode, dir);
+	if (err)
+		goto out_fail;
+
+	err = exofs_add_link(dentry, inode);
+	if (err)
+		goto out_fail;
+
+	d_instantiate(dentry, inode);
+out:
+	return err;
+
+out_fail:
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
+	iput(inode);
+out_dir:
+	inode_dec_link_count(dir);
+	goto out;
+}
+
+static int exofs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct exofs_dir_entry *de;
+	struct page *page;
+	int err = -ENOENT;
+
+	de = exofs_find_entry(dir, dentry, &page);
+	if (!de)
+		goto out;
+
+	err = exofs_delete_entry(de, page);
+	if (err)
+		goto out;
+
+	inode->i_ctime = dir->i_ctime;
+	inode_dec_link_count(inode);
+	err = 0;
+out:
+	return err;
+}
+
+static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int err = -ENOTEMPTY;
+
+	if (exofs_empty_dir(inode)) {
+		err = exofs_unlink(dir, dentry);
+		if (!err) {
+			inode->i_size = 0;
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
+		}
+	}
+	return err;
+}
+
+static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	struct page *dir_page = NULL;
+	struct exofs_dir_entry *dir_de = NULL;
+	struct page *old_page;
+	struct exofs_dir_entry *old_de;
+	int err = -ENOENT;
+
+	old_de = exofs_find_entry(old_dir, old_dentry, &old_page);
+	if (!old_de)
+		goto out;
+
+	if (S_ISDIR(old_inode->i_mode)) {
+		err = -EIO;
+		dir_de = exofs_dotdot(old_inode, &dir_page);
+		if (!dir_de)
+			goto out_old;
+	}
+
+	if (new_inode) {
+		struct page *new_page;
+		struct exofs_dir_entry *new_de;
+
+		err = -ENOTEMPTY;
+		if (dir_de && !exofs_empty_dir(new_inode))
+			goto out_dir;
+
+		err = -ENOENT;
+		new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
+		if (!new_de)
+			goto out_dir;
+		inode_inc_link_count(old_inode);
+		err = exofs_set_link(new_dir, new_de, new_page, old_inode);
+		new_inode->i_ctime = CURRENT_TIME;
+		if (dir_de)
+			drop_nlink(new_inode);
+		inode_dec_link_count(new_inode);
+		if (err)
+			goto out_dir;
+	} else {
+		if (dir_de) {
+			err = -EMLINK;
+			if (new_dir->i_nlink >= EXOFS_LINK_MAX)
+				goto out_dir;
+		}
+		inode_inc_link_count(old_inode);
+		err = exofs_add_link(new_dentry, old_inode);
+		if (err) {
+			inode_dec_link_count(old_inode);
+			goto out_dir;
+		}
+		if (dir_de)
+			inode_inc_link_count(new_dir);
+	}
+
+	old_inode->i_ctime = CURRENT_TIME;
+
+	exofs_delete_entry(old_de, old_page);
+	inode_dec_link_count(old_inode);
+
+	if (dir_de) {
+		err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
+		inode_dec_link_count(old_dir);
+		if (err)
+			goto out_dir;
+	}
+	return 0;
+
+
+out_dir:
+	if (dir_de) {
+		kunmap(dir_page);
+		page_cache_release(dir_page);
+	}
+out_old:
+	kunmap(old_page);
+	page_cache_release(old_page);
+out:
+	return err;
+}
+
+const struct inode_operations exofs_dir_inode_operations = {
+	.create 	= exofs_create,
+	.lookup 	= exofs_lookup,
+	.link   	= exofs_link,
+	.unlink 	= exofs_unlink,
+	.symlink	= exofs_symlink,
+	.mkdir  	= exofs_mkdir,
+	.rmdir  	= exofs_rmdir,
+	.mknod  	= exofs_mknod,
+	.rename 	= exofs_rename,
+	.setattr	= exofs_setattr,
+};
+
+const struct inode_operations exofs_special_inode_operations = {
+	.setattr	= exofs_setattr,
+};
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
new file mode 100644
index 0000000..b249ae9
--- /dev/null
+++ b/fs/exofs/osd.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <scsi/scsi_device.h>
+#include <scsi/osd_sense.h>
+
+#include "exofs.h"
+
+int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
+{
+	struct osd_sense_info osi;
+	int ret = osd_req_decode_sense(or, &osi);
+
+	if (ret) { /* translate to Linux codes */
+		if (osi.additional_code == scsi_invalid_field_in_cdb) {
+			if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
+				ret = -EFAULT;
+			if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
+				ret = -ENOENT;
+			else
+				ret = -EINVAL;
+		} else if (osi.additional_code == osd_quota_error)
+			ret = -ENOSPC;
+		else
+			ret = -EIO;
+	}
+
+	/* FIXME: should be include in osd_sense_info */
+	if (in_resid)
+		*in_resid = or->in.req ? or->in.req->data_len : 0;
+
+	if (out_resid)
+		*out_resid = or->out.req ? or->out.req->data_len : 0;
+
+	return ret;
+}
+
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
+{
+	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
+}
+
+/*
+ * Perform a synchronous OSD operation.
+ */
+int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
+{
+	int ret;
+
+	or->timeout = timeout;
+	ret = osd_finalize_request(or, 0, credential, NULL);
+	if (ret) {
+		EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
+		return ret;
+	}
+
+	ret = osd_execute_request(or);
+
+	if (ret)
+		EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
+	/* osd_req_decode_sense(or, ret); */
+	return ret;
+}
+
+/*
+ * Perform an asynchronous OSD operation.
+ */
+int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
+		   void *caller_context, u8 *cred)
+{
+	int ret;
+
+	ret = osd_finalize_request(or, 0, cred, NULL);
+	if (ret) {
+		EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
+		return ret;
+	}
+
+	ret = osd_execute_request_async(or, async_done, caller_context);
+
+	if (ret)
+		EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
+	return ret;
+}
+
+int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
+{
+	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
+	void *iter = NULL;
+	int nelem;
+
+	do {
+		nelem = 1;
+		osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
+		if ((cur_attr.attr_page == attr->attr_page) &&
+		    (cur_attr.attr_id == attr->attr_id)) {
+			attr->len = cur_attr.len;
+			attr->val_ptr = cur_attr.val_ptr;
+			return 0;
+		}
+	} while (iter);
+
+	return -EIO;
+}
+
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (!bio)
+		return -ENOMEM;
+
+	osd_req_read(or, obj, bio, offset);
+	return 0;
+}
+
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (!bio)
+		return -ENOMEM;
+
+	osd_req_write(or, obj, bio, offset);
+	return 0;
+}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
new file mode 100644
index 0000000..9f1985e
--- /dev/null
+++ b/fs/exofs/super.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/string.h>
+#include <linux/parser.h>
+#include <linux/vfs.h>
+#include <linux/random.h>
+#include <linux/exportfs.h>
+
+#include "exofs.h"
+
+/******************************************************************************
+ * MOUNT OPTIONS
+ *****************************************************************************/
+
+/*
+ * struct to hold what we get from mount options
+ */
+struct exofs_mountopt {
+	const char *dev_name;
+	uint64_t pid;
+	int timeout;
+};
+
+/*
+ * exofs-specific mount-time options.
+ */
+enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
+
+/*
+ * Our mount-time options.  These should ideally be 64-bit unsigned, but the
+ * kernel's parsing functions do not currently support that.  32-bit should be
+ * sufficient for most applications now.
+ */
+static match_table_t tokens = {
+	{Opt_pid, "pid=%u"},
+	{Opt_to, "to=%u"},
+	{Opt_err, NULL}
+};
+
+/*
+ * The main option parsing method.  Also makes sure that all of the mandatory
+ * mount options were set.
+ */
+static int parse_options(char *options, struct exofs_mountopt *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	bool s_pid = false;
+
+	EXOFS_DBGMSG("parse_options %s\n", options);
+	/* defaults */
+	memset(opts, 0, sizeof(*opts));
+	opts->timeout = BLK_DEFAULT_SG_TIMEOUT;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		char str[32];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_pid:
+			if (0 == match_strlcpy(str, &args[0], sizeof(str)))
+				return -EINVAL;
+			opts->pid = simple_strtoull(str, NULL, 0);
+			if (opts->pid < EXOFS_MIN_PID) {
+				EXOFS_ERR("Partition ID must be >= %u",
+					  EXOFS_MIN_PID);
+				return -EINVAL;
+			}
+			s_pid = 1;
+			break;
+		case Opt_to:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			if (option <= 0) {
+				EXOFS_ERR("Timout must be > 0");
+				return -EINVAL;
+			}
+			opts->timeout = option * HZ;
+			break;
+		}
+	}
+
+	if (!s_pid) {
+		EXOFS_ERR("Need to specify the following options:\n");
+		EXOFS_ERR("    -o pid=pid_no_to_use\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/******************************************************************************
+ * INODE CACHE
+ *****************************************************************************/
+
+/*
+ * Our inode cache.  Isn't it pretty?
+ */
+static struct kmem_cache *exofs_inode_cachep;
+
+/*
+ * Allocate an inode in the cache
+ */
+static struct inode *exofs_alloc_inode(struct super_block *sb)
+{
+	struct exofs_i_info *oi;
+
+	oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL);
+	if (!oi)
+		return NULL;
+
+	oi->vfs_inode.i_version = 1;
+	return &oi->vfs_inode;
+}
+
+/*
+ * Remove an inode from the cache
+ */
+static void exofs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
+/*
+ * Initialize the inode
+ */
+static void exofs_init_once(void *foo)
+{
+	struct exofs_i_info *oi = foo;
+
+	inode_init_once(&oi->vfs_inode);
+}
+
+/*
+ * Create and initialize the inode cache
+ */
+static int init_inodecache(void)
+{
+	exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
+				sizeof(struct exofs_i_info), 0,
+				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+				exofs_init_once);
+	if (exofs_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/*
+ * Destroy the inode cache
+ */
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(exofs_inode_cachep);
+}
+
+/******************************************************************************
+ * SUPERBLOCK FUNCTIONS
+ *****************************************************************************/
+static const struct super_operations exofs_sops;
+static const struct export_operations exofs_export_ops;
+
+/*
+ * Write the superblock to the OSD
+ */
+static void exofs_write_super(struct super_block *sb)
+{
+	struct exofs_sb_info *sbi;
+	struct exofs_fscb *fscb;
+	struct osd_request *or;
+	struct osd_obj_id obj;
+	int ret;
+
+	fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
+	if (!fscb) {
+		EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
+		return;
+	}
+
+	lock_kernel();
+	sbi = sb->s_fs_info;
+	fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
+	fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
+	fscb->s_magic = cpu_to_le16(sb->s_magic);
+	fscb->s_newfs = 0;
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("exofs_write_super: osd_start_request failed.\n");
+		goto out;
+	}
+
+	obj.partition = sbi->s_pid;
+	obj.id = EXOFS_SUPER_ID;
+	ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
+	if (unlikely(ret)) {
+		EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n");
+		goto out;
+	}
+
+	ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
+	if (unlikely(ret)) {
+		EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
+		goto out;
+	}
+	sb->s_dirt = 0;
+
+out:
+	if (or)
+		osd_end_request(or);
+	unlock_kernel();
+	kfree(fscb);
+}
+
+/*
+ * This function is called when the vfs is freeing the superblock.  We just
+ * need to free our own part.
+ */
+static void exofs_put_super(struct super_block *sb)
+{
+	int num_pend;
+	struct exofs_sb_info *sbi = sb->s_fs_info;
+
+	/* make sure there are no pending commands */
+	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
+	     num_pend = atomic_read(&sbi->s_curr_pending)) {
+		wait_queue_head_t wq;
+		init_waitqueue_head(&wq);
+		wait_event_timeout(wq,
+				  (atomic_read(&sbi->s_curr_pending) == 0),
+				  msecs_to_jiffies(100));
+	}
+
+	osduld_put_device(sbi->s_dev);
+	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
+}
+
+/*
+ * Read the superblock from the OSD and fill in the fields
+ */
+static int exofs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *root;
+	struct exofs_mountopt *opts = data;
+	struct exofs_sb_info *sbi;	/*extended info                  */
+	struct exofs_fscb fscb;		/*on-disk superblock info        */
+	struct osd_request *or = NULL;
+	struct osd_obj_id obj;
+	int ret;
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	sb->s_fs_info = sbi;
+
+	/* use mount options to fill superblock */
+	sbi->s_dev = osduld_path_lookup(opts->dev_name);
+	if (IS_ERR(sbi->s_dev)) {
+		ret = PTR_ERR(sbi->s_dev);
+		sbi->s_dev = NULL;
+		goto free_sbi;
+	}
+
+	sbi->s_pid = opts->pid;
+	sbi->s_timeout = opts->timeout;
+
+	/* fill in some other data by hand */
+	memset(sb->s_id, 0, sizeof(sb->s_id));
+	strcpy(sb->s_id, "exofs");
+	sb->s_blocksize = EXOFS_BLKSIZE;
+	sb->s_blocksize_bits = EXOFS_BLKSHIFT;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	atomic_set(&sbi->s_curr_pending, 0);
+	sb->s_bdev = NULL;
+	sb->s_dev = 0;
+
+	/* read data from on-disk superblock object */
+	obj.partition = sbi->s_pid;
+	obj.id = EXOFS_SUPER_ID;
+	exofs_make_credential(sbi->s_cred, &obj);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		if (!silent)
+			EXOFS_ERR(
+			       "exofs_fill_super: osd_start_request failed.\n");
+		ret = -ENOMEM;
+		goto free_sbi;
+	}
+	ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
+	if (unlikely(ret)) {
+		if (!silent)
+			EXOFS_ERR(
+			       "exofs_fill_super: osd_req_read_kern failed.\n");
+		ret = -ENOMEM;
+		goto free_sbi;
+	}
+
+	ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
+	if (unlikely(ret)) {
+		if (!silent)
+			EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
+		ret = -EIO;
+		goto free_sbi;
+	}
+
+	sb->s_magic = le16_to_cpu(fscb.s_magic);
+	sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
+	sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
+
+	/* make sure what we read from the object store is correct */
+	if (sb->s_magic != EXOFS_SUPER_MAGIC) {
+		if (!silent)
+			EXOFS_ERR("ERROR: Bad magic value\n");
+		ret = -EINVAL;
+		goto free_sbi;
+	}
+
+	/* start generation numbers from a random point */
+	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+	spin_lock_init(&sbi->s_next_gen_lock);
+
+	/* set up operation vectors */
+	sb->s_op = &exofs_sops;
+	sb->s_export_op = &exofs_export_ops;
+	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+	if (IS_ERR(root)) {
+		EXOFS_ERR("ERROR: exofs_iget failed\n");
+		ret = PTR_ERR(root);
+		goto free_sbi;
+	}
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		iput(root);
+		EXOFS_ERR("ERROR: get root inode failed\n");
+		ret = -ENOMEM;
+		goto free_sbi;
+	}
+
+	if (!S_ISDIR(root->i_mode)) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+		EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n",
+		       root->i_mode);
+		ret = -EINVAL;
+		goto free_sbi;
+	}
+
+	ret = 0;
+out:
+	if (or)
+		osd_end_request(or);
+	return ret;
+
+free_sbi:
+	osduld_put_device(sbi->s_dev); /* NULL safe */
+	kfree(sbi);
+	goto out;
+}
+
+/*
+ * Set up the superblock (calls exofs_fill_super eventually)
+ */
+static int exofs_get_sb(struct file_system_type *type,
+			  int flags, const char *dev_name,
+			  void *data, struct vfsmount *mnt)
+{
+	struct exofs_mountopt opts;
+	int ret;
+
+	ret = parse_options(data, &opts);
+	if (ret)
+		return ret;
+
+	opts.dev_name = dev_name;
+	return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+}
+
+/*
+ * Return information about the file system state in the buffer.  This is used
+ * by the 'df' command, for example.
+ */
+static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct exofs_sb_info *sbi = sb->s_fs_info;
+	struct osd_obj_id obj = {sbi->s_pid, 0};
+	struct osd_attr attrs[] = {
+		ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
+			OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
+		ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION,
+			OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)),
+	};
+	uint64_t capacity = ULLONG_MAX;
+	uint64_t used = ULLONG_MAX;
+	struct osd_request *or;
+	uint8_t cred_a[OSD_CAP_LEN];
+	int ret;
+
+	/* get used/capacity attributes */
+	exofs_make_credential(cred_a, &obj);
+
+	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
+		return -ENOMEM;
+	}
+
+	osd_req_get_attributes(or, &obj);
+	osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs));
+	ret = exofs_sync_op(or, sbi->s_timeout, cred_a);
+	if (unlikely(ret))
+		goto out;
+
+	ret = extract_attr_from_req(or, &attrs[0]);
+	if (likely(!ret))
+		capacity = get_unaligned_be64(attrs[0].val_ptr);
+	else
+		EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
+
+	ret = extract_attr_from_req(or, &attrs[1]);
+	if (likely(!ret))
+		used = get_unaligned_be64(attrs[1].val_ptr);
+	else
+		EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n");
+
+	/* fill in the stats buffer */
+	buf->f_type = EXOFS_SUPER_MAGIC;
+	buf->f_bsize = EXOFS_BLKSIZE;
+	buf->f_blocks = (capacity >> EXOFS_BLKSHIFT);
+	buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT);
+	buf->f_bavail = buf->f_bfree;
+	buf->f_files = sbi->s_numfiles;
+	buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
+	buf->f_namelen = EXOFS_NAME_LEN;
+
+out:
+	osd_end_request(or);
+	return ret;
+}
+
+static const struct super_operations exofs_sops = {
+	.alloc_inode    = exofs_alloc_inode,
+	.destroy_inode  = exofs_destroy_inode,
+	.write_inode    = exofs_write_inode,
+	.delete_inode   = exofs_delete_inode,
+	.put_super      = exofs_put_super,
+	.write_super    = exofs_write_super,
+	.statfs         = exofs_statfs,
+};
+
+/******************************************************************************
+ * EXPORT OPERATIONS
+ *****************************************************************************/
+
+struct dentry *exofs_get_parent(struct dentry *child)
+{
+	unsigned long ino = exofs_parent_ino(child);
+
+	if (!ino)
+		return NULL;
+
+	return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
+}
+
+static struct inode *exofs_nfs_get_inode(struct super_block *sb,
+		u64 ino, u32 generation)
+{
+	struct inode *inode;
+
+	inode = exofs_iget(sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (generation && inode->i_generation != generation) {
+		/* we didn't find the right inode.. */
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	return inode;
+}
+
+static struct dentry *exofs_fh_to_dentry(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    exofs_nfs_get_inode);
+}
+
+static struct dentry *exofs_fh_to_parent(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    exofs_nfs_get_inode);
+}
+
+static const struct export_operations exofs_export_ops = {
+	.fh_to_dentry = exofs_fh_to_dentry,
+	.fh_to_parent = exofs_fh_to_parent,
+	.get_parent = exofs_get_parent,
+};
+
+/******************************************************************************
+ * INSMOD/RMMOD
+ *****************************************************************************/
+
+/*
+ * struct that describes this file system
+ */
+static struct file_system_type exofs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "exofs",
+	.get_sb         = exofs_get_sb,
+	.kill_sb        = generic_shutdown_super,
+};
+
+static int __init init_exofs(void)
+{
+	int err;
+
+	err = init_inodecache();
+	if (err)
+		goto out;
+
+	err = register_filesystem(&exofs_type);
+	if (err)
+		goto out_d;
+
+	return 0;
+out_d:
+	destroy_inodecache();
+out:
+	return err;
+}
+
+static void __exit exit_exofs(void)
+{
+	unregister_filesystem(&exofs_type);
+	destroy_inodecache();
+}
+
+MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>");
+MODULE_DESCRIPTION("exofs");
+MODULE_LICENSE("GPL");
+
+module_init(init_exofs)
+module_exit(exit_exofs)
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
new file mode 100644
index 0000000..36e2d7b
--- /dev/null
+++ b/fs/exofs/symlink.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2005, 2006
+ * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
+ * Copyright (C) 2005, 2006
+ * International Business Machines
+ * Copyright (C) 2008, 2009
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * Copyrights for code taken from ext2:
+ *     Copyright (C) 1992, 1993, 1994, 1995
+ *     Remy Card (card@masi.ibp.fr)
+ *     Laboratoire MASI - Institut Blaise Pascal
+ *     Universite Pierre et Marie Curie (Paris VI)
+ *     from
+ *     linux/fs/minix/inode.c
+ *     Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.  Since it is based on ext2, and the only
+ * valid version of GPL for the Linux kernel is version 2, the only valid
+ * version of GPL for exofs is version 2.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/namei.h>
+
+#include "exofs.h"
+
+static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct exofs_i_info *oi = exofs_i(dentry->d_inode);
+
+	nd_set_link(nd, (char *)oi->i_data);
+	return NULL;
+}
+
+const struct inode_operations exofs_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+};
+
+const struct inode_operations exofs_fast_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= exofs_follow_link,
+};
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 2bb788a..e48e9a3 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -87,12 +87,12 @@ static int read_block_bitmap(struct super_block *sb,
 {
 	struct buffer_head *bh = NULL;
 	int retval = 0;
-	kernel_lb_addr loc;
+	struct kernel_lb_addr loc;
 
 	loc.logicalBlockNum = bitmap->s_extPosition;
 	loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
 
-	bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block));
+	bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
 	if (!bh)
 		retval = -EIO;
 
@@ -140,27 +140,29 @@ static inline int load_block_bitmap(struct super_block *sb,
 	return slot;
 }
 
-static bool udf_add_free_space(struct udf_sb_info *sbi,
-				u16 partition, u32 cnt)
+static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt)
 {
+	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct logicalVolIntegrityDesc *lvid;
 
-	if (sbi->s_lvid_bh == NULL)
-		return false;
+	if (!sbi->s_lvid_bh)
+		return;
 
 	lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
 	le32_add_cpu(&lvid->freeSpaceTable[partition], cnt);
-	return true;
+	udf_updated_lvid(sb);
 }
 
 static void udf_bitmap_free_blocks(struct super_block *sb,
 				   struct inode *inode,
 				   struct udf_bitmap *bitmap,
-				   kernel_lb_addr bloc, uint32_t offset,
+				   struct kernel_lb_addr *bloc,
+				   uint32_t offset,
 				   uint32_t count)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct buffer_head *bh = NULL;
+	struct udf_part_map *partmap;
 	unsigned long block;
 	unsigned long block_group;
 	unsigned long bit;
@@ -169,17 +171,17 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 	unsigned long overflow;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (bloc.logicalBlockNum < 0 ||
-	    (bloc.logicalBlockNum + count) >
-		sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
+	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
+	if (bloc->logicalBlockNum < 0 ||
+	    (bloc->logicalBlockNum + count) >
+		partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
-			  bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
-			  sbi->s_partmaps[bloc.partitionReferenceNum].
-							s_partition_len);
+			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
+			  count, partmap->s_partition_len);
 		goto error_return;
 	}
 
-	block = bloc.logicalBlockNum + offset +
+	block = bloc->logicalBlockNum + offset +
 		(sizeof(struct spaceBitmapDesc) << 3);
 
 	do {
@@ -207,7 +209,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 			} else {
 				if (inode)
 					vfs_dq_free_block(inode, 1);
-				udf_add_free_space(sbi, sbi->s_partition, 1);
+				udf_add_free_space(sb, sbi->s_partition, 1);
 			}
 		}
 		mark_buffer_dirty(bh);
@@ -218,9 +220,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 	} while (overflow);
 
 error_return:
-	sb->s_dirt = 1;
-	if (sbi->s_lvid_bh)
-		mark_buffer_dirty(sbi->s_lvid_bh);
 	mutex_unlock(&sbi->s_alloc_mutex);
 }
 
@@ -277,9 +276,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 	} while (block_count > 0);
 
 out:
-	if (udf_add_free_space(sbi, partition, -alloc_count))
-		mark_buffer_dirty(sbi->s_lvid_bh);
-	sb->s_dirt = 1;
+	udf_add_free_space(sb, partition, -alloc_count);
 	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
@@ -409,9 +406,7 @@ got_block:
 
 	mark_buffer_dirty(bh);
 
-	if (udf_add_free_space(sbi, partition, -1))
-		mark_buffer_dirty(sbi->s_lvid_bh);
-	sb->s_dirt = 1;
+	udf_add_free_space(sb, partition, -1);
 	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
@@ -425,26 +420,28 @@ error_return:
 static void udf_table_free_blocks(struct super_block *sb,
 				  struct inode *inode,
 				  struct inode *table,
-				  kernel_lb_addr bloc, uint32_t offset,
+				  struct kernel_lb_addr *bloc,
+				  uint32_t offset,
 				  uint32_t count)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct udf_part_map *partmap;
 	uint32_t start, end;
 	uint32_t elen;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	struct extent_position oepos, epos;
 	int8_t etype;
 	int i;
 	struct udf_inode_info *iinfo;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (bloc.logicalBlockNum < 0 ||
-	    (bloc.logicalBlockNum + count) >
-		sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
+	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
+	if (bloc->logicalBlockNum < 0 ||
+	    (bloc->logicalBlockNum + count) >
+		partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
-			  sbi->s_partmaps[bloc.partitionReferenceNum].
-							s_partition_len);
+			  partmap->s_partition_len);
 		goto error_return;
 	}
 
@@ -453,11 +450,10 @@ static void udf_table_free_blocks(struct super_block *sb,
 	   could occure, but.. oh well */
 	if (inode)
 		vfs_dq_free_block(inode, count);
-	if (udf_add_free_space(sbi, sbi->s_partition, count))
-		mark_buffer_dirty(sbi->s_lvid_bh);
+	udf_add_free_space(sb, sbi->s_partition, count);
 
-	start = bloc.logicalBlockNum + offset;
-	end = bloc.logicalBlockNum + offset + count - 1;
+	start = bloc->logicalBlockNum + offset;
+	end = bloc->logicalBlockNum + offset + count - 1;
 
 	epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry);
 	elen = 0;
@@ -483,7 +479,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 				start += count;
 				count = 0;
 			}
-			udf_write_aext(table, &oepos, eloc, elen, 1);
+			udf_write_aext(table, &oepos, &eloc, elen, 1);
 		} else if (eloc.logicalBlockNum == (end + 1)) {
 			if ((0x3FFFFFFF - elen) <
 					(count << sb->s_blocksize_bits)) {
@@ -502,7 +498,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 				end -= count;
 				count = 0;
 			}
-			udf_write_aext(table, &oepos, eloc, elen, 1);
+			udf_write_aext(table, &oepos, &eloc, elen, 1);
 		}
 
 		if (epos.bh != oepos.bh) {
@@ -532,8 +528,8 @@ static void udf_table_free_blocks(struct super_block *sb,
 		 */
 
 		int adsize;
-		short_ad *sad = NULL;
-		long_ad *lad = NULL;
+		struct short_ad *sad = NULL;
+		struct long_ad *lad = NULL;
 		struct allocExtDesc *aed;
 
 		eloc.logicalBlockNum = start;
@@ -541,9 +537,9 @@ static void udf_table_free_blocks(struct super_block *sb,
 			(count << sb->s_blocksize_bits);
 
 		if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-			adsize = sizeof(short_ad);
+			adsize = sizeof(struct short_ad);
 		else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-			adsize = sizeof(long_ad);
+			adsize = sizeof(struct long_ad);
 		else {
 			brelse(oepos.bh);
 			brelse(epos.bh);
@@ -563,7 +559,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 			elen -= sb->s_blocksize;
 
 			epos.bh = udf_tread(sb,
-					udf_get_lb_pblock(sb, epos.block, 0));
+					udf_get_lb_pblock(sb, &epos.block, 0));
 			if (!epos.bh) {
 				brelse(oepos.bh);
 				goto error_return;
@@ -601,15 +597,15 @@ static void udf_table_free_blocks(struct super_block *sb,
 			if (sbi->s_udfrev >= 0x0200)
 				udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
 					    3, 1, epos.block.logicalBlockNum,
-					    sizeof(tag));
+					    sizeof(struct tag));
 			else
 				udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
 					    2, 1, epos.block.logicalBlockNum,
-					    sizeof(tag));
+					    sizeof(struct tag));
 
 			switch (iinfo->i_alloc_type) {
 			case ICBTAG_FLAG_AD_SHORT:
-				sad = (short_ad *)sptr;
+				sad = (struct short_ad *)sptr;
 				sad->extLength = cpu_to_le32(
 					EXT_NEXT_EXTENT_ALLOCDECS |
 					sb->s_blocksize);
@@ -617,7 +613,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 					cpu_to_le32(epos.block.logicalBlockNum);
 				break;
 			case ICBTAG_FLAG_AD_LONG:
-				lad = (long_ad *)sptr;
+				lad = (struct long_ad *)sptr;
 				lad->extLength = cpu_to_le32(
 					EXT_NEXT_EXTENT_ALLOCDECS |
 					sb->s_blocksize);
@@ -635,7 +631,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 
 		/* It's possible that stealing the block emptied the extent */
 		if (elen) {
-			udf_write_aext(table, &epos, eloc, elen, 1);
+			udf_write_aext(table, &epos, &eloc, elen, 1);
 
 			if (!epos.bh) {
 				iinfo->i_lenAlloc += adsize;
@@ -653,7 +649,6 @@ static void udf_table_free_blocks(struct super_block *sb,
 	brelse(oepos.bh);
 
 error_return:
-	sb->s_dirt = 1;
 	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
@@ -666,7 +661,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	int alloc_count = 0;
 	uint32_t elen, adsize;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	struct extent_position epos;
 	int8_t etype = -1;
 	struct udf_inode_info *iinfo;
@@ -677,9 +672,9 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 
 	iinfo = UDF_I(table);
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		return 0;
 
@@ -707,7 +702,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 			alloc_count = block_count;
 			eloc.logicalBlockNum += alloc_count;
 			elen -= (alloc_count << sb->s_blocksize_bits);
-			udf_write_aext(table, &epos, eloc,
+			udf_write_aext(table, &epos, &eloc,
 					(etype << 30) | elen, 1);
 		} else
 			udf_delete_aext(table, epos, eloc,
@@ -718,10 +713,8 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 
 	brelse(epos.bh);
 
-	if (alloc_count && udf_add_free_space(sbi, partition, -alloc_count)) {
-		mark_buffer_dirty(sbi->s_lvid_bh);
-		sb->s_dirt = 1;
-	}
+	if (alloc_count)
+		udf_add_free_space(sb, partition, -alloc_count);
 	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
@@ -735,7 +728,7 @@ static int udf_table_new_block(struct super_block *sb,
 	uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF;
 	uint32_t newblock = 0, adsize;
 	uint32_t elen, goal_elen = 0;
-	kernel_lb_addr eloc, uninitialized_var(goal_eloc);
+	struct kernel_lb_addr eloc, uninitialized_var(goal_eloc);
 	struct extent_position epos, goal_epos;
 	int8_t etype;
 	struct udf_inode_info *iinfo = UDF_I(table);
@@ -743,9 +736,9 @@ static int udf_table_new_block(struct super_block *sb,
 	*err = -ENOSPC;
 
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		return newblock;
 
@@ -814,46 +807,37 @@ static int udf_table_new_block(struct super_block *sb,
 	}
 
 	if (goal_elen)
-		udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1);
+		udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
 	else
 		udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
 	brelse(goal_epos.bh);
 
-	if (udf_add_free_space(sbi, partition, -1))
-		mark_buffer_dirty(sbi->s_lvid_bh);
+	udf_add_free_space(sb, partition, -1);
 
-	sb->s_dirt = 1;
 	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
 }
 
-inline void udf_free_blocks(struct super_block *sb,
-			    struct inode *inode,
-			    kernel_lb_addr bloc, uint32_t offset,
-			    uint32_t count)
+void udf_free_blocks(struct super_block *sb, struct inode *inode,
+		     struct kernel_lb_addr *bloc, uint32_t offset,
+		     uint32_t count)
 {
-	uint16_t partition = bloc.partitionReferenceNum;
+	uint16_t partition = bloc->partitionReferenceNum;
 	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
 
 	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
-		return udf_bitmap_free_blocks(sb, inode,
-					      map->s_uspace.s_bitmap,
-					      bloc, offset, count);
+		udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap,
+				       bloc, offset, count);
 	} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
-		return udf_table_free_blocks(sb, inode,
-					     map->s_uspace.s_table,
-					     bloc, offset, count);
+		udf_table_free_blocks(sb, inode, map->s_uspace.s_table,
+				      bloc, offset, count);
 	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
-		return udf_bitmap_free_blocks(sb, inode,
-					      map->s_fspace.s_bitmap,
-					      bloc, offset, count);
+		udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap,
+				       bloc, offset, count);
 	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
-		return udf_table_free_blocks(sb, inode,
-					     map->s_fspace.s_table,
-					     bloc, offset, count);
-	} else {
-		return;
+		udf_table_free_blocks(sb, inode, map->s_fspace.s_table,
+				      bloc, offset, count);
 	}
 }
 
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 62dc270..2efd4d5 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -51,7 +51,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 	uint8_t lfi;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
 	struct buffer_head *tmp, *bha[16];
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t offset;
 	int i, num, ret = 0;
@@ -80,13 +80,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 			ret = -ENOENT;
 			goto out;
 		}
-		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+		block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-				epos.offset -= sizeof(short_ad);
+				epos.offset -= sizeof(struct short_ad);
 			else if (iinfo->i_alloc_type ==
 					ICBTAG_FLAG_AD_LONG)
-				epos.offset -= sizeof(long_ad);
+				epos.offset -= sizeof(struct long_ad);
 		} else {
 			offset = 0;
 		}
@@ -101,7 +101,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 			if (i + offset > (elen >> dir->i_sb->s_blocksize_bits))
 				i = (elen >> dir->i_sb->s_blocksize_bits) - offset;
 			for (num = 0; i > 0; i--) {
-				block = udf_get_lb_pblock(dir->i_sb, eloc, offset + i);
+				block = udf_get_lb_pblock(dir->i_sb, &eloc, offset + i);
 				tmp = udf_tgetblk(dir->i_sb, block);
 				if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
 					bha[num++] = tmp;
@@ -161,9 +161,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 			memcpy(fname, "..", flen);
 			dt_type = DT_DIR;
 		} else {
-			kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation);
+			struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation);
 
-			iblock = udf_get_lb_pblock(dir->i_sb, tloc, 0);
+			iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0);
 			flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
 			dt_type = DT_UNKNOWN;
 		}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2820f8f..1d2c570 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -20,7 +20,7 @@
 
 #if 0
 static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
-				uint8_t ad_size, kernel_lb_addr fe_loc,
+				uint8_t ad_size, struct kernel_lb_addr fe_loc,
 				int *pos, int *offset, struct buffer_head **bh,
 				int *error)
 {
@@ -75,7 +75,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 					 struct udf_fileident_bh *fibh,
 					 struct fileIdentDesc *cfi,
 					 struct extent_position *epos,
-					 kernel_lb_addr *eloc, uint32_t *elen,
+					 struct kernel_lb_addr *eloc, uint32_t *elen,
 					 sector_t *offset)
 {
 	struct fileIdentDesc *fi;
@@ -111,7 +111,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 		    (EXT_RECORDED_ALLOCATED >> 30))
 			return NULL;
 
-		block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
+		block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
 
 		(*offset)++;
 
@@ -131,7 +131,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 			if (i + *offset > (*elen >> blocksize_bits))
 				i = (*elen >> blocksize_bits)-*offset;
 			for (num = 0; i > 0; i--) {
-				block = udf_get_lb_pblock(dir->i_sb, *eloc,
+				block = udf_get_lb_pblock(dir->i_sb, eloc,
 							  *offset + i);
 				tmp = udf_tgetblk(dir->i_sb, block);
 				if (tmp && !buffer_uptodate(tmp) &&
@@ -169,7 +169,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 		    (EXT_RECORDED_ALLOCATED >> 30))
 			return NULL;
 
-		block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
+		block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
 
 		(*offset)++;
 
@@ -249,9 +249,9 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
 }
 
 #if 0
-static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
+static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
 {
-	extent_ad *ext;
+	struct extent_ad *ext;
 	struct fileEntry *fe;
 	uint8_t *ptr;
 
@@ -274,54 +274,54 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
 	if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
 		ptr += *offset;
 
-	ext = (extent_ad *)ptr;
+	ext = (struct extent_ad *)ptr;
 
-	*offset = *offset + sizeof(extent_ad);
+	*offset = *offset + sizeof(struct extent_ad);
 	return ext;
 }
 #endif
 
-short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
+struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
 			      int inc)
 {
-	short_ad *sa;
+	struct short_ad *sa;
 
 	if ((!ptr) || (!offset)) {
 		printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n");
 		return NULL;
 	}
 
-	if ((*offset + sizeof(short_ad)) > maxoffset)
+	if ((*offset + sizeof(struct short_ad)) > maxoffset)
 		return NULL;
 	else {
-		sa = (short_ad *)ptr;
+		sa = (struct short_ad *)ptr;
 		if (sa->extLength == 0)
 			return NULL;
 	}
 
 	if (inc)
-		*offset += sizeof(short_ad);
+		*offset += sizeof(struct short_ad);
 	return sa;
 }
 
-long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc)
+struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc)
 {
-	long_ad *la;
+	struct long_ad *la;
 
 	if ((!ptr) || (!offset)) {
 		printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n");
 		return NULL;
 	}
 
-	if ((*offset + sizeof(long_ad)) > maxoffset)
+	if ((*offset + sizeof(struct long_ad)) > maxoffset)
 		return NULL;
 	else {
-		la = (long_ad *)ptr;
+		la = (struct long_ad *)ptr;
 		if (la->extLength == 0)
 			return NULL;
 	}
 
 	if (inc)
-		*offset += sizeof(long_ad);
+		*offset += sizeof(struct long_ad);
 	return la;
 }
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index a0974df..4792b77 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -38,10 +38,10 @@
 #define _ECMA_167_H 1
 
 /* Character set specification (ECMA 167r3 1/7.2.1) */
-typedef struct {
+struct charspec {
 	uint8_t		charSetType;
 	uint8_t		charSetInfo[63];
-} __attribute__ ((packed)) charspec;
+} __attribute__ ((packed));
 
 /* Character Set Type (ECMA 167r3 1/7.2.1.1) */
 #define CHARSPEC_TYPE_CS0		0x00	/* (1/7.2.2) */
@@ -57,7 +57,7 @@ typedef struct {
 typedef uint8_t		dstring;
 
 /* Timestamp (ECMA 167r3 1/7.3) */
-typedef struct {
+struct timestamp {
 	__le16		typeAndTimezone;
 	__le16		year;
 	uint8_t		month;
@@ -68,7 +68,7 @@ typedef struct {
 	uint8_t		centiseconds;
 	uint8_t		hundredsOfMicroseconds;
 	uint8_t		microseconds;
-} __attribute__ ((packed)) timestamp;
+} __attribute__ ((packed));
 
 /* Type and Time Zone (ECMA 167r3 1/7.3.1) */
 #define TIMESTAMP_TYPE_MASK		0xF000
@@ -78,11 +78,11 @@ typedef struct {
 #define TIMESTAMP_TIMEZONE_MASK		0x0FFF
 
 /* Entity identifier (ECMA 167r3 1/7.4) */
-typedef struct {
+struct regid {
 	uint8_t		flags;
 	uint8_t		ident[23];
 	uint8_t		identSuffix[8];
-} __attribute__ ((packed)) regid;
+} __attribute__ ((packed));
 
 /* Flags (ECMA 167r3 1/7.4.1) */
 #define ENTITYID_FLAGS_DIRTY		0x00
@@ -126,38 +126,38 @@ struct terminatingExtendedAreaDesc {
 
 /* Boot Descriptor (ECMA 167r3 2/9.4) */
 struct bootDesc {
-	uint8_t		structType;
-	uint8_t		stdIdent[VSD_STD_ID_LEN];
-	uint8_t		structVersion;
-	uint8_t		reserved1;
-	regid		archType;
-	regid		bootIdent;
-	__le32		bootExtLocation;
-	__le32		bootExtLength;
-	__le64		loadAddress;
-	__le64		startAddress;
-	timestamp	descCreationDateAndTime;
-	__le16		flags;
-	uint8_t		reserved2[32];
-	uint8_t		bootUse[1906];
+	uint8_t			structType;
+	uint8_t			stdIdent[VSD_STD_ID_LEN];
+	uint8_t			structVersion;
+	uint8_t			reserved1;
+	struct regid		archType;
+	struct regid		bootIdent;
+	__le32			bootExtLocation;
+	__le32			bootExtLength;
+	__le64			loadAddress;
+	__le64			startAddress;
+	struct timestamp	descCreationDateAndTime;
+	__le16			flags;
+	uint8_t			reserved2[32];
+	uint8_t			bootUse[1906];
 } __attribute__ ((packed));
 
 /* Flags (ECMA 167r3 2/9.4.12) */
 #define BOOT_FLAGS_ERASE		0x01
 
 /* Extent Descriptor (ECMA 167r3 3/7.1) */
-typedef struct {
+struct extent_ad {
 	__le32		extLength;
 	__le32		extLocation;
-} __attribute__ ((packed)) extent_ad;
+} __attribute__ ((packed));
 
-typedef struct {
+struct kernel_extent_ad {
 	uint32_t	extLength;
 	uint32_t	extLocation;
-} kernel_extent_ad;
+};
 
 /* Descriptor Tag (ECMA 167r3 3/7.2) */
-typedef struct {
+struct tag {
 	__le16		tagIdent;
 	__le16		descVersion;
 	uint8_t		tagChecksum;
@@ -166,7 +166,7 @@ typedef struct {
 	__le16		descCRC;
 	__le16		descCRCLength;
 	__le32		tagLocation;
-} __attribute__ ((packed)) tag;
+} __attribute__ ((packed));
 
 /* Tag Identifier (ECMA 167r3 3/7.2.1) */
 #define TAG_IDENT_PVD			0x0001
@@ -190,28 +190,28 @@ struct NSRDesc {
 
 /* Primary Volume Descriptor (ECMA 167r3 3/10.1) */
 struct primaryVolDesc {
-	tag		descTag;
-	__le32		volDescSeqNum;
-	__le32		primaryVolDescNum;
-	dstring		volIdent[32];
-	__le16		volSeqNum;
-	__le16		maxVolSeqNum;
-	__le16		interchangeLvl;
-	__le16		maxInterchangeLvl;
-	__le32		charSetList;
-	__le32		maxCharSetList;
-	dstring		volSetIdent[128];
-	charspec	descCharSet;
-	charspec	explanatoryCharSet;
-	extent_ad	volAbstract;
-	extent_ad	volCopyright;
-	regid		appIdent;
-	timestamp	recordingDateAndTime;
-	regid		impIdent;
-	uint8_t		impUse[64];
-	__le32		predecessorVolDescSeqLocation;
-	__le16		flags;
-	uint8_t		reserved[22];
+	struct tag		descTag;
+	__le32			volDescSeqNum;
+	__le32			primaryVolDescNum;
+	dstring			volIdent[32];
+	__le16			volSeqNum;
+	__le16			maxVolSeqNum;
+	__le16			interchangeLvl;
+	__le16			maxInterchangeLvl;
+	__le32			charSetList;
+	__le32			maxCharSetList;
+	dstring			volSetIdent[128];
+	struct charspec		descCharSet;
+	struct charspec		explanatoryCharSet;
+	struct extent_ad	volAbstract;
+	struct extent_ad	volCopyright;
+	struct regid		appIdent;
+	struct timestamp	recordingDateAndTime;
+	struct regid		impIdent;
+	uint8_t			impUse[64];
+	__le32			predecessorVolDescSeqLocation;
+	__le16			flags;
+	uint8_t			reserved[22];
 } __attribute__ ((packed));
 
 /* Flags (ECMA 167r3 3/10.1.21) */
@@ -219,40 +219,40 @@ struct primaryVolDesc {
 
 /* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */
 struct anchorVolDescPtr {
-	tag		descTag;
-	extent_ad	mainVolDescSeqExt;
-	extent_ad	reserveVolDescSeqExt;
-	uint8_t	 	reserved[480];
+	struct tag		descTag;
+	struct extent_ad	mainVolDescSeqExt;
+	struct extent_ad	reserveVolDescSeqExt;
+	uint8_t	 		reserved[480];
 } __attribute__ ((packed));
 
 /* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */
 struct volDescPtr {
-	tag		descTag;
-	__le32		volDescSeqNum;
-	extent_ad	nextVolDescSeqExt;
-	uint8_t		reserved[484];
+	struct tag		descTag;
+	__le32			volDescSeqNum;
+	struct extent_ad	nextVolDescSeqExt;
+	uint8_t			reserved[484];
 } __attribute__ ((packed));
 
 /* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */
 struct impUseVolDesc {
-	tag		descTag;
+	struct tag	descTag;
 	__le32		volDescSeqNum;
-	regid		impIdent;
+	struct regid	impIdent;
 	uint8_t		impUse[460];
 } __attribute__ ((packed));
 
 /* Partition Descriptor (ECMA 167r3 3/10.5) */
 struct partitionDesc {
-	tag descTag;
+	struct tag descTag;
 	__le32 volDescSeqNum;
 	__le16 partitionFlags;
 	__le16 partitionNumber;
-	regid partitionContents;
+	struct regid partitionContents;
 	uint8_t partitionContentsUse[128];
 	__le32 accessType;
 	__le32 partitionStartingLocation;
 	__le32 partitionLength;
-	regid impIdent;
+	struct regid impIdent;
 	uint8_t impUse[128];
 	uint8_t reserved[156];
 } __attribute__ ((packed));
@@ -278,19 +278,19 @@ struct partitionDesc {
 
 /* Logical Volume Descriptor (ECMA 167r3 3/10.6) */
 struct logicalVolDesc {
-	tag		descTag;
-	__le32		volDescSeqNum;
-	charspec	descCharSet;
-	dstring		logicalVolIdent[128];
-	__le32		logicalBlockSize;
-	regid		domainIdent;
-	uint8_t		logicalVolContentsUse[16];
-	__le32		mapTableLength;
-	__le32		numPartitionMaps;
-	regid		impIdent;
-	uint8_t		impUse[128];
-	extent_ad	integritySeqExt;
-	uint8_t		partitionMaps[0];
+	struct tag		descTag;
+	__le32			volDescSeqNum;
+	struct charspec		descCharSet;
+	dstring			logicalVolIdent[128];
+	__le32			logicalBlockSize;
+	struct regid		domainIdent;
+	uint8_t			logicalVolContentsUse[16];
+	__le32			mapTableLength;
+	__le32			numPartitionMaps;
+	struct regid		impIdent;
+	uint8_t			impUse[128];
+	struct extent_ad	integritySeqExt;
+	uint8_t			partitionMaps[0];
 } __attribute__ ((packed));
 
 /* Generic Partition Map (ECMA 167r3 3/10.7.1) */
@@ -322,30 +322,30 @@ struct genericPartitionMap2 {
 
 /* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */
 struct unallocSpaceDesc {
-	tag		descTag;
-	__le32		volDescSeqNum;
-	__le32		numAllocDescs;
-	extent_ad	allocDescs[0];
+	struct tag		descTag;
+	__le32			volDescSeqNum;
+	__le32			numAllocDescs;
+	struct extent_ad	allocDescs[0];
 } __attribute__ ((packed));
 
 /* Terminating Descriptor (ECMA 167r3 3/10.9) */
 struct terminatingDesc {
-	tag		descTag;
+	struct tag	descTag;
 	uint8_t		reserved[496];
 } __attribute__ ((packed));
 
 /* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */
 struct logicalVolIntegrityDesc {
-	tag		descTag;
-	timestamp	recordingDateAndTime;
-	__le32		integrityType;
-	extent_ad	nextIntegrityExt;
-	uint8_t		logicalVolContentsUse[32];
-	__le32		numOfPartitions;
-	__le32		lengthOfImpUse;
-	__le32		freeSpaceTable[0];
-	__le32		sizeTable[0];
-	uint8_t		impUse[0];
+	struct tag		descTag;
+	struct timestamp	recordingDateAndTime;
+	__le32			integrityType;
+	struct extent_ad	nextIntegrityExt;
+	uint8_t			logicalVolContentsUse[32];
+	__le32			numOfPartitions;
+	__le32			lengthOfImpUse;
+	__le32			freeSpaceTable[0];
+	__le32			sizeTable[0];
+	uint8_t			impUse[0];
 } __attribute__ ((packed));
 
 /* Integrity Type (ECMA 167r3 3/10.10.3) */
@@ -353,50 +353,50 @@ struct logicalVolIntegrityDesc {
 #define LVID_INTEGRITY_TYPE_CLOSE	0x00000001
 
 /* Recorded Address (ECMA 167r3 4/7.1) */
-typedef struct {
+struct lb_addr {
 	__le32		logicalBlockNum;
 	__le16	 	partitionReferenceNum;
-} __attribute__ ((packed)) lb_addr;
+} __attribute__ ((packed));
 
 /* ... and its in-core analog */
-typedef struct {
+struct kernel_lb_addr {
 	uint32_t		logicalBlockNum;
 	uint16_t	 	partitionReferenceNum;
-} kernel_lb_addr;
+};
 
 /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
-typedef struct {
+struct short_ad {
         __le32		extLength;
         __le32		extPosition;
-} __attribute__ ((packed)) short_ad;
+} __attribute__ ((packed));
 
 /* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */
-typedef struct {
+struct long_ad {
 	__le32		extLength;
-	lb_addr		extLocation;
+	struct lb_addr	extLocation;
 	uint8_t		impUse[6];
-} __attribute__ ((packed)) long_ad;
+} __attribute__ ((packed));
 
-typedef struct {
-	uint32_t	extLength;
-	kernel_lb_addr	extLocation;
-	uint8_t		impUse[6];
-} kernel_long_ad;
+struct kernel_long_ad {
+	uint32_t		extLength;
+	struct kernel_lb_addr	extLocation;
+	uint8_t			impUse[6];
+};
 
 /* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */
-typedef struct {
+struct ext_ad {
 	__le32		extLength;
 	__le32		recordedLength;
 	__le32		informationLength;
-	lb_addr		extLocation;
-} __attribute__ ((packed)) ext_ad;
+	struct lb_addr	extLocation;
+} __attribute__ ((packed));
 
-typedef struct {
-	uint32_t	extLength;
-	uint32_t	recordedLength;
-	uint32_t	informationLength;
-	kernel_lb_addr	extLocation;
-} kernel_ext_ad;
+struct kernel_ext_ad {
+	uint32_t		extLength;
+	uint32_t		recordedLength;
+	uint32_t		informationLength;
+	struct kernel_lb_addr	extLocation;
+};
 
 /* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */
 
@@ -415,44 +415,44 @@ typedef struct {
 
 /* File Set Descriptor (ECMA 167r3 4/14.1) */
 struct fileSetDesc {
-	tag		descTag;
-	timestamp	recordingDateAndTime;
-	__le16		interchangeLvl;
-	__le16		maxInterchangeLvl;
-	__le32		charSetList;
-	__le32		maxCharSetList;
-	__le32		fileSetNum;
-	__le32		fileSetDescNum;
-	charspec	logicalVolIdentCharSet;
-	dstring		logicalVolIdent[128];
-	charspec	fileSetCharSet;
-	dstring		fileSetIdent[32];
-	dstring		copyrightFileIdent[32];
-	dstring		abstractFileIdent[32];
-	long_ad		rootDirectoryICB;
-	regid		domainIdent;
-	long_ad		nextExt;
-	long_ad		streamDirectoryICB;
-	uint8_t		reserved[32];
+	struct tag		descTag;
+	struct timestamp	recordingDateAndTime;
+	__le16			interchangeLvl;
+	__le16			maxInterchangeLvl;
+	__le32			charSetList;
+	__le32			maxCharSetList;
+	__le32			fileSetNum;
+	__le32			fileSetDescNum;
+	struct charspec		logicalVolIdentCharSet;
+	dstring			logicalVolIdent[128];
+	struct charspec		fileSetCharSet;
+	dstring			fileSetIdent[32];
+	dstring			copyrightFileIdent[32];
+	dstring			abstractFileIdent[32];
+	struct long_ad		rootDirectoryICB;
+	struct regid		domainIdent;
+	struct long_ad		nextExt;
+	struct long_ad		streamDirectoryICB;
+	uint8_t			reserved[32];
 } __attribute__ ((packed));
 
 /* Partition Header Descriptor (ECMA 167r3 4/14.3) */
 struct partitionHeaderDesc {
-	short_ad	unallocSpaceTable;
-	short_ad	unallocSpaceBitmap;
-	short_ad	partitionIntegrityTable;
-	short_ad	freedSpaceTable;
-	short_ad	freedSpaceBitmap;
+	struct short_ad	unallocSpaceTable;
+	struct short_ad	unallocSpaceBitmap;
+	struct short_ad	partitionIntegrityTable;
+	struct short_ad	freedSpaceTable;
+	struct short_ad	freedSpaceBitmap;
 	uint8_t		reserved[88];
 } __attribute__ ((packed));
 
 /* File Identifier Descriptor (ECMA 167r3 4/14.4) */
 struct fileIdentDesc {
-	tag		descTag;
+	struct tag	descTag;
 	__le16		fileVersionNum;
 	uint8_t		fileCharacteristics;
 	uint8_t		lengthFileIdent;
-	long_ad		icb;
+	struct long_ad	icb;
 	__le16		lengthOfImpUse;
 	uint8_t		impUse[0];
 	uint8_t		fileIdent[0];
@@ -468,22 +468,22 @@ struct fileIdentDesc {
 
 /* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */
 struct allocExtDesc {
-	tag		descTag;
+	struct tag	descTag;
 	__le32		previousAllocExtLocation;
 	__le32		lengthAllocDescs;
 } __attribute__ ((packed));
 
 /* ICB Tag (ECMA 167r3 4/14.6) */
-typedef struct {
+struct icbtag {
 	__le32		priorRecordedNumDirectEntries;
 	__le16		strategyType;
 	__le16		strategyParameter;
 	__le16		numEntries;
 	uint8_t		reserved;
 	uint8_t		fileType;
-	lb_addr		parentICBLocation;
+	struct lb_addr	parentICBLocation;
 	__le16		flags;
-} __attribute__ ((packed)) icbtag;
+} __attribute__ ((packed));
 
 /* Strategy Type (ECMA 167r3 4/14.6.2) */
 #define ICBTAG_STRATEGY_TYPE_UNDEF	0x0000
@@ -528,41 +528,41 @@ typedef struct {
 
 /* Indirect Entry (ECMA 167r3 4/14.7) */
 struct indirectEntry {
-	tag		descTag;
-	icbtag		icbTag;
-	long_ad		indirectICB;
+	struct tag	descTag;
+	struct icbtag	icbTag;
+	struct long_ad	indirectICB;
 } __attribute__ ((packed));
 
 /* Terminal Entry (ECMA 167r3 4/14.8) */
 struct terminalEntry {
-	tag		descTag;
-	icbtag		icbTag;
+	struct tag	descTag;
+	struct icbtag	icbTag;
 } __attribute__ ((packed));
 
 /* File Entry (ECMA 167r3 4/14.9) */
 struct fileEntry {
-	tag		descTag;
-	icbtag		icbTag;
-	__le32		uid;
-	__le32		gid;
-	__le32		permissions;
-	__le16		fileLinkCount;
-	uint8_t		recordFormat;
-	uint8_t		recordDisplayAttr;
-	__le32		recordLength;
-	__le64		informationLength;
-	__le64		logicalBlocksRecorded;
-	timestamp	accessTime;
-	timestamp	modificationTime;
-	timestamp	attrTime;
-	__le32		checkpoint;
-	long_ad		extendedAttrICB;
-	regid		impIdent;
-	__le64		uniqueID;
-	__le32		lengthExtendedAttr;
-	__le32		lengthAllocDescs;
-	uint8_t		extendedAttr[0];
-	uint8_t		allocDescs[0];
+	struct tag		descTag;
+	struct icbtag		icbTag;
+	__le32			uid;
+	__le32			gid;
+	__le32			permissions;
+	__le16			fileLinkCount;
+	uint8_t			recordFormat;
+	uint8_t			recordDisplayAttr;
+	__le32			recordLength;
+	__le64			informationLength;
+	__le64			logicalBlocksRecorded;
+	struct timestamp	accessTime;
+	struct timestamp	modificationTime;
+	struct timestamp	attrTime;
+	__le32			checkpoint;
+	struct long_ad		extendedAttrICB;
+	struct regid		impIdent;
+	__le64			uniqueID;
+	__le32			lengthExtendedAttr;
+	__le32			lengthAllocDescs;
+	uint8_t			extendedAttr[0];
+	uint8_t			allocDescs[0];
 } __attribute__ ((packed));
 
 /* Permissions (ECMA 167r3 4/14.9.5) */
@@ -604,7 +604,7 @@ struct fileEntry {
 
 /* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */
 struct extendedAttrHeaderDesc {
-	tag		descTag;
+	struct tag	descTag;
 	__le32		impAttrLocation;
 	__le32		appAttrLocation;
 } __attribute__ ((packed));
@@ -687,7 +687,7 @@ struct impUseExtAttr {
 	uint8_t		reserved[3];
 	__le32		attrLength;
 	__le32		impUseLength;
-	regid		impIdent;
+	struct regid	impIdent;
 	uint8_t		impUse[0];
 } __attribute__ ((packed));
 
@@ -698,7 +698,7 @@ struct appUseExtAttr {
 	uint8_t		reserved[3];
 	__le32		attrLength;
 	__le32		appUseLength;
-	regid		appIdent;
+	struct regid	appIdent;
 	uint8_t		appUse[0];
 } __attribute__ ((packed));
 
@@ -712,15 +712,15 @@ struct appUseExtAttr {
 
 /* Unallocated Space Entry (ECMA 167r3 4/14.11) */
 struct unallocSpaceEntry {
-	tag		descTag;
-	icbtag		icbTag;
+	struct tag	descTag;
+	struct icbtag	icbTag;
 	__le32		lengthAllocDescs;
 	uint8_t		allocDescs[0];
 } __attribute__ ((packed));
 
 /* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
 struct spaceBitmapDesc {
-	tag		descTag;
+	struct tag	descTag;
 	__le32		numOfBits;
 	__le32		numOfBytes;
 	uint8_t		bitmap[0];
@@ -728,13 +728,13 @@ struct spaceBitmapDesc {
 
 /* Partition Integrity Entry (ECMA 167r3 4/14.13) */
 struct partitionIntegrityEntry {
-	tag		descTag;
-	icbtag		icbTag;
-	timestamp	recordingDateAndTime;
-	uint8_t		integrityType;
-	uint8_t		reserved[175];
-	regid		impIdent;
-	uint8_t		impUse[256];
+	struct tag		descTag;
+	struct icbtag		icbTag;
+	struct timestamp	recordingDateAndTime;
+	uint8_t			integrityType;
+	uint8_t			reserved[175];
+	struct regid		impIdent;
+	uint8_t			impUse[256];
 } __attribute__ ((packed));
 
 /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
@@ -765,32 +765,32 @@ struct pathComponent {
 
 /* File Entry (ECMA 167r3 4/14.17) */
 struct extendedFileEntry {
-	tag		descTag;
-	icbtag		icbTag;
-	__le32		uid;
-	__le32		gid;
-	__le32		permissions;
-	__le16		fileLinkCount;
-	uint8_t		recordFormat;
-	uint8_t		recordDisplayAttr;
-	__le32		recordLength;
-	__le64		informationLength;
-	__le64		objectSize;
-	__le64		logicalBlocksRecorded;
-	timestamp	accessTime;
-	timestamp	modificationTime;
-	timestamp	createTime;
-	timestamp	attrTime;
-	__le32		checkpoint;
-	__le32		reserved;
-	long_ad		extendedAttrICB;
-	long_ad		streamDirectoryICB;
-	regid		impIdent;
-	__le64		uniqueID;
-	__le32		lengthExtendedAttr;
-	__le32		lengthAllocDescs;
-	uint8_t		extendedAttr[0];
-	uint8_t		allocDescs[0];
+	struct tag		descTag;
+	struct icbtag		icbTag;
+	__le32			uid;
+	__le32			gid;
+	__le32			permissions;
+	__le16			fileLinkCount;
+	uint8_t			recordFormat;
+	uint8_t			recordDisplayAttr;
+	__le32			recordLength;
+	__le64			informationLength;
+	__le64			objectSize;
+	__le64			logicalBlocksRecorded;
+	struct timestamp	accessTime;
+	struct timestamp	modificationTime;
+	struct timestamp	createTime;
+	struct timestamp	attrTime;
+	__le32			checkpoint;
+	__le32			reserved;
+	struct long_ad		extendedAttrICB;
+	struct long_ad		streamDirectoryICB;
+	struct regid		impIdent;
+	__le64			uniqueID;
+	__le32			lengthExtendedAttr;
+	__le32			lengthAllocDescs;
+	uint8_t			extendedAttr[0];
+	uint8_t			allocDescs[0];
 } __attribute__ ((packed));
 
 #endif /* _ECMA_167_H */
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 47dbe56..c10fa39 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -49,12 +49,11 @@ void udf_free_inode(struct inode *inode)
 			le32_add_cpu(&lvidiu->numDirs, -1);
 		else
 			le32_add_cpu(&lvidiu->numFiles, -1);
-
-		mark_buffer_dirty(sbi->s_lvid_bh);
+		udf_updated_lvid(sb);
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
 
-	udf_free_blocks(sb, NULL, UDF_I(inode)->i_location, 0, 1);
+	udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
 }
 
 struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
@@ -122,7 +121,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 		if (!(++uniqueID & 0x00000000FFFFFFFFUL))
 			uniqueID += 16;
 		lvhd->uniqueID = cpu_to_le64(uniqueID);
-		mark_buffer_dirty(sbi->s_lvid_bh);
+		udf_updated_lvid(sb);
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
 	inode->i_mode = mode;
@@ -138,7 +137,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	iinfo->i_location.logicalBlockNum = block;
 	iinfo->i_location.partitionReferenceNum =
 				dinfo->i_location.partitionReferenceNum;
-	inode->i_ino = udf_get_lb_pblock(sb, iinfo->i_location, 0);
+	inode->i_ino = udf_get_lb_pblock(sb, &iinfo->i_location, 0);
 	inode->i_blocks = 0;
 	iinfo->i_lenEAttr = 0;
 	iinfo->i_lenAlloc = 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 30ebde4..e7533f7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -55,15 +55,15 @@ static int udf_alloc_i_data(struct inode *inode, size_t size);
 static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
 					sector_t *, int *);
 static int8_t udf_insert_aext(struct inode *, struct extent_position,
-			      kernel_lb_addr, uint32_t);
+			      struct kernel_lb_addr, uint32_t);
 static void udf_split_extents(struct inode *, int *, int, int,
-			      kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+			      struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
 static void udf_prealloc_extents(struct inode *, int, int,
-				 kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+				 struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
 static void udf_merge_extents(struct inode *,
-			      kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+			      struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
 static void udf_update_extents(struct inode *,
-			       kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
+			       struct kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
 			       struct extent_position *);
 static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
@@ -200,7 +200,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
 {
 	int newblock;
 	struct buffer_head *dbh = NULL;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	uint8_t alloctype;
 	struct extent_position epos;
@@ -281,7 +281,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
 	epos.bh = NULL;
 	epos.block = iinfo->i_location;
 	epos.offset = udf_file_entry_alloc_offset(inode);
-	udf_add_aext(inode, &epos, eloc, elen, 0);
+	udf_add_aext(inode, &epos, &eloc, elen, 0);
 	/* UniqueID stuff */
 
 	brelse(epos.bh);
@@ -359,12 +359,12 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block,
 
 /* Extend the file by 'blocks' blocks, return the number of extents added */
 int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
-		    kernel_long_ad *last_ext, sector_t blocks)
+		    struct kernel_long_ad *last_ext, sector_t blocks)
 {
 	sector_t add;
 	int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
 	struct super_block *sb = inode->i_sb;
-	kernel_lb_addr prealloc_loc = {};
+	struct kernel_lb_addr prealloc_loc = {};
 	int prealloc_len = 0;
 	struct udf_inode_info *iinfo;
 
@@ -411,11 +411,11 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
 	}
 
 	if (fake) {
-		udf_add_aext(inode, last_pos, last_ext->extLocation,
+		udf_add_aext(inode, last_pos, &last_ext->extLocation,
 			     last_ext->extLength, 1);
 		count++;
 	} else
-		udf_write_aext(inode, last_pos, last_ext->extLocation,
+		udf_write_aext(inode, last_pos, &last_ext->extLocation,
 				last_ext->extLength, 1);
 
 	/* Managed to do everything necessary? */
@@ -432,7 +432,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
 	/* Create enough extents to cover the whole hole */
 	while (blocks > add) {
 		blocks -= add;
-		if (udf_add_aext(inode, last_pos, last_ext->extLocation,
+		if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
 				 last_ext->extLength, 1) == -1)
 			return -1;
 		count++;
@@ -440,7 +440,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
 	if (blocks) {
 		last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
 			(blocks << sb->s_blocksize_bits);
-		if (udf_add_aext(inode, last_pos, last_ext->extLocation,
+		if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
 				 last_ext->extLength, 1) == -1)
 			return -1;
 		count++;
@@ -449,7 +449,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
 out:
 	/* Do we have some preallocated blocks saved? */
 	if (prealloc_len) {
-		if (udf_add_aext(inode, last_pos, prealloc_loc,
+		if (udf_add_aext(inode, last_pos, &prealloc_loc,
 				 prealloc_len, 1) == -1)
 			return -1;
 		last_ext->extLocation = prealloc_loc;
@@ -459,9 +459,9 @@ out:
 
 	/* last_pos should point to the last written extent... */
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		last_pos->offset -= sizeof(short_ad);
+		last_pos->offset -= sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		last_pos->offset -= sizeof(long_ad);
+		last_pos->offset -= sizeof(struct long_ad);
 	else
 		return -1;
 
@@ -473,11 +473,11 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
 {
 	static sector_t last_block;
 	struct buffer_head *result = NULL;
-	kernel_long_ad laarr[EXTENT_MERGE_SIZE];
+	struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
 	struct extent_position prev_epos, cur_epos, next_epos;
 	int count = 0, startnum = 0, endnum = 0;
 	uint32_t elen = 0, tmpelen;
-	kernel_lb_addr eloc, tmpeloc;
+	struct kernel_lb_addr eloc, tmpeloc;
 	int c = 1;
 	loff_t lbcount = 0, b_off = 0;
 	uint32_t newblocknum, newblock;
@@ -550,12 +550,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
 			elen = EXT_RECORDED_ALLOCATED |
 				((elen + inode->i_sb->s_blocksize - 1) &
 				 ~(inode->i_sb->s_blocksize - 1));
-			etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1);
+			etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
 		}
 		brelse(prev_epos.bh);
 		brelse(cur_epos.bh);
 		brelse(next_epos.bh);
-		newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset);
+		newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
 		*phys = newblock;
 		return NULL;
 	}
@@ -572,7 +572,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
 		} else {
 			/* Create a fake extent when there's not one */
 			memset(&laarr[0].extLocation, 0x00,
-				sizeof(kernel_lb_addr));
+				sizeof(struct kernel_lb_addr));
 			laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
 			/* Will udf_extend_file() create real extent from
 			   a fake one? */
@@ -602,7 +602,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
 			laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
 				inode->i_sb->s_blocksize;
 			memset(&laarr[c].extLocation, 0x00,
-				sizeof(kernel_lb_addr));
+				sizeof(struct kernel_lb_addr));
 			count++;
 			endnum++;
 		}
@@ -699,7 +699,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
 
 static void udf_split_extents(struct inode *inode, int *c, int offset,
 			      int newblocknum,
-			      kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+			      struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
 			      int *endnum)
 {
 	unsigned long blocksize = inode->i_sb->s_blocksize;
@@ -726,7 +726,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
 		if (offset) {
 			if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
 				udf_free_blocks(inode->i_sb, inode,
-						laarr[curr].extLocation,
+						&laarr[curr].extLocation,
 						0, offset);
 				laarr[curr].extLength =
 					EXT_NOT_RECORDED_NOT_ALLOCATED |
@@ -763,7 +763,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
 }
 
 static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
-				 kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+				 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
 				 int *endnum)
 {
 	int start, length = 0, currlength = 0, i;
@@ -817,7 +817,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
 					 inode->i_sb->s_blocksize_bits);
 			else {
 				memmove(&laarr[c + 2], &laarr[c + 1],
-					sizeof(long_ad) * (*endnum - (c + 1)));
+					sizeof(struct long_ad) * (*endnum - (c + 1)));
 				(*endnum)++;
 				laarr[c + 1].extLocation.logicalBlockNum = next;
 				laarr[c + 1].extLocation.partitionReferenceNum =
@@ -846,7 +846,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
 					if (*endnum > (i + 1))
 						memmove(&laarr[i],
 							&laarr[i + 1],
-							sizeof(long_ad) *
+							sizeof(struct long_ad) *
 							(*endnum - (i + 1)));
 					i--;
 					(*endnum)--;
@@ -859,7 +859,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
 }
 
 static void udf_merge_extents(struct inode *inode,
-			      kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+			      struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
 			      int *endnum)
 {
 	int i;
@@ -867,8 +867,8 @@ static void udf_merge_extents(struct inode *inode,
 	unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
 
 	for (i = 0; i < (*endnum - 1); i++) {
-		kernel_long_ad *li /*l[i]*/ = &laarr[i];
-		kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1];
+		struct kernel_long_ad *li /*l[i]*/ = &laarr[i];
+		struct kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1];
 
 		if (((li->extLength >> 30) == (lip1->extLength >> 30)) &&
 			(((li->extLength >> 30) ==
@@ -902,7 +902,7 @@ static void udf_merge_extents(struct inode *inode,
 					 blocksize - 1) & ~(blocksize - 1));
 				if (*endnum > (i + 2))
 					memmove(&laarr[i + 1], &laarr[i + 2],
-						sizeof(long_ad) *
+						sizeof(struct long_ad) *
 						(*endnum - (i + 2)));
 				i--;
 				(*endnum)--;
@@ -911,7 +911,7 @@ static void udf_merge_extents(struct inode *inode,
 				(EXT_NOT_RECORDED_ALLOCATED >> 30)) &&
 			   ((lip1->extLength >> 30) ==
 				(EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) {
-			udf_free_blocks(inode->i_sb, inode, li->extLocation, 0,
+			udf_free_blocks(inode->i_sb, inode, &li->extLocation, 0,
 					((li->extLength &
 					  UDF_EXTENT_LENGTH_MASK) +
 					 blocksize - 1) >> blocksize_bits);
@@ -937,7 +937,7 @@ static void udf_merge_extents(struct inode *inode,
 					  blocksize - 1) & ~(blocksize - 1));
 				if (*endnum > (i + 2))
 					memmove(&laarr[i + 1], &laarr[i + 2],
-						sizeof(long_ad) *
+						sizeof(struct long_ad) *
 						(*endnum - (i + 2)));
 				i--;
 				(*endnum)--;
@@ -945,7 +945,7 @@ static void udf_merge_extents(struct inode *inode,
 		} else if ((li->extLength >> 30) ==
 					(EXT_NOT_RECORDED_ALLOCATED >> 30)) {
 			udf_free_blocks(inode->i_sb, inode,
-					li->extLocation, 0,
+					&li->extLocation, 0,
 					((li->extLength &
 						UDF_EXTENT_LENGTH_MASK) +
 					 blocksize - 1) >> blocksize_bits);
@@ -959,12 +959,12 @@ static void udf_merge_extents(struct inode *inode,
 }
 
 static void udf_update_extents(struct inode *inode,
-			       kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+			       struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
 			       int startnum, int endnum,
 			       struct extent_position *epos)
 {
 	int start = 0, i;
-	kernel_lb_addr tmploc;
+	struct kernel_lb_addr tmploc;
 	uint32_t tmplen;
 
 	if (startnum > endnum) {
@@ -983,7 +983,7 @@ static void udf_update_extents(struct inode *inode,
 
 	for (i = start; i < endnum; i++) {
 		udf_next_aext(inode, epos, &tmploc, &tmplen, 0);
-		udf_write_aext(inode, epos, laarr[i].extLocation,
+		udf_write_aext(inode, epos, &laarr[i].extLocation,
 			       laarr[i].extLength, 1);
 	}
 }
@@ -1076,7 +1076,7 @@ static void __udf_read_inode(struct inode *inode)
 	 *      i_nlink = 1
 	 *      i_op = NULL;
 	 */
-	bh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 0, &ident);
+	bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
 	if (!bh) {
 		printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
 		       inode->i_ino);
@@ -1098,24 +1098,24 @@ static void __udf_read_inode(struct inode *inode)
 	if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
 		struct buffer_head *ibh;
 
-		ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1,
+		ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
 					&ident);
 		if (ident == TAG_IDENT_IE && ibh) {
 			struct buffer_head *nbh = NULL;
-			kernel_lb_addr loc;
+			struct kernel_lb_addr loc;
 			struct indirectEntry *ie;
 
 			ie = (struct indirectEntry *)ibh->b_data;
 			loc = lelb_to_cpu(ie->indirectICB.extLocation);
 
 			if (ie->indirectICB.extLength &&
-				(nbh = udf_read_ptagged(inode->i_sb, loc, 0,
+				(nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
 							&ident))) {
 				if (ident == TAG_IDENT_FE ||
 					ident == TAG_IDENT_EFE) {
 					memcpy(&iinfo->i_location,
 						&loc,
-						sizeof(kernel_lb_addr));
+						sizeof(struct kernel_lb_addr));
 					brelse(bh);
 					brelse(ibh);
 					brelse(nbh);
@@ -1222,8 +1222,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	inode->i_size = le64_to_cpu(fe->informationLength);
 	iinfo->i_lenExtents = inode->i_size;
 
-	inode->i_mode = udf_convert_permissions(fe);
-	inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask;
+	if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
+			sbi->s_fmode != UDF_INVALID_MODE)
+		inode->i_mode = sbi->s_fmode;
+	else if (fe->icbTag.fileType == ICBTAG_FILE_TYPE_DIRECTORY &&
+			sbi->s_dmode != UDF_INVALID_MODE)
+		inode->i_mode = sbi->s_dmode;
+	else
+		inode->i_mode = udf_convert_permissions(fe);
+	inode->i_mode &= ~sbi->s_umask;
 
 	if (iinfo->i_efe == 0) {
 		inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1396,7 +1403,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 
 	bh = udf_tread(inode->i_sb,
 			udf_get_lb_pblock(inode->i_sb,
-					  iinfo->i_location, 0));
+					  &iinfo->i_location, 0));
 	if (!bh) {
 		udf_debug("bread failure\n");
 		return -EIO;
@@ -1416,13 +1423,13 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 		       iinfo->i_ext.i_data, inode->i_sb->s_blocksize -
 					sizeof(struct unallocSpaceEntry));
 		crclen = sizeof(struct unallocSpaceEntry) +
-				iinfo->i_lenAlloc - sizeof(tag);
+				iinfo->i_lenAlloc - sizeof(struct tag);
 		use->descTag.tagLocation = cpu_to_le32(
 						iinfo->i_location.
 							logicalBlockNum);
 		use->descTag.descCRCLength = cpu_to_le16(crclen);
 		use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use +
-							   sizeof(tag),
+							   sizeof(struct tag),
 							   crclen));
 		use->descTag.tagChecksum = udf_tag_checksum(&use->descTag);
 
@@ -1459,23 +1466,23 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	fe->informationLength = cpu_to_le64(inode->i_size);
 
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-		regid *eid;
+		struct regid *eid;
 		struct deviceSpec *dsea =
 			(struct deviceSpec *)udf_get_extendedattr(inode, 12, 1);
 		if (!dsea) {
 			dsea = (struct deviceSpec *)
 				udf_add_extendedattr(inode,
 						     sizeof(struct deviceSpec) +
-						     sizeof(regid), 12, 0x3);
+						     sizeof(struct regid), 12, 0x3);
 			dsea->attrType = cpu_to_le32(12);
 			dsea->attrSubtype = 1;
 			dsea->attrLength = cpu_to_le32(
 						sizeof(struct deviceSpec) +
-						sizeof(regid));
-			dsea->impUseLength = cpu_to_le32(sizeof(regid));
+						sizeof(struct regid));
+			dsea->impUseLength = cpu_to_le32(sizeof(struct regid));
 		}
-		eid = (regid *)dsea->impUse;
-		memset(eid, 0, sizeof(regid));
+		eid = (struct regid *)dsea->impUse;
+		memset(eid, 0, sizeof(struct regid));
 		strcpy(eid->ident, UDF_ID_DEVELOPER);
 		eid->identSuffix[0] = UDF_OS_CLASS_UNIX;
 		eid->identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1494,7 +1501,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 		udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
 		udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
 		udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
-		memset(&(fe->impIdent), 0, sizeof(regid));
+		memset(&(fe->impIdent), 0, sizeof(struct regid));
 		strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
 		fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
 		fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1533,7 +1540,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 		udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
 		udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
 
-		memset(&(efe->impIdent), 0, sizeof(regid));
+		memset(&(efe->impIdent), 0, sizeof(struct regid));
 		strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
 		efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
 		efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1584,9 +1591,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	fe->descTag.tagLocation = cpu_to_le32(
 					iinfo->i_location.logicalBlockNum);
 	crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc -
-								sizeof(tag);
+								sizeof(struct tag);
 	fe->descTag.descCRCLength = cpu_to_le16(crclen);
-	fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag),
+	fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(struct tag),
 						  crclen));
 	fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag);
 
@@ -1606,7 +1613,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	return err;
 }
 
-struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
+struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
 {
 	unsigned long block = udf_get_lb_pblock(sb, ino, 0);
 	struct inode *inode = iget_locked(sb, block);
@@ -1615,7 +1622,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
 		return NULL;
 
 	if (inode->i_state & I_NEW) {
-		memcpy(&UDF_I(inode)->i_location, &ino, sizeof(kernel_lb_addr));
+		memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
 		__udf_read_inode(inode);
 		unlock_new_inode(inode);
 	}
@@ -1623,10 +1630,10 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
 	if (is_bad_inode(inode))
 		goto out_iput;
 
-	if (ino.logicalBlockNum >= UDF_SB(sb)->
-			s_partmaps[ino.partitionReferenceNum].s_partition_len) {
+	if (ino->logicalBlockNum >= UDF_SB(sb)->
+			s_partmaps[ino->partitionReferenceNum].s_partition_len) {
 		udf_debug("block=%d, partition=%d out of range\n",
-			  ino.logicalBlockNum, ino.partitionReferenceNum);
+			  ino->logicalBlockNum, ino->partitionReferenceNum);
 		make_bad_inode(inode);
 		goto out_iput;
 	}
@@ -1639,11 +1646,11 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
 }
 
 int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
-		    kernel_lb_addr eloc, uint32_t elen, int inc)
+		    struct kernel_lb_addr *eloc, uint32_t elen, int inc)
 {
 	int adsize;
-	short_ad *sad = NULL;
-	long_ad *lad = NULL;
+	struct short_ad *sad = NULL;
+	struct long_ad *lad = NULL;
 	struct allocExtDesc *aed;
 	int8_t etype;
 	uint8_t *ptr;
@@ -1657,9 +1664,9 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 		ptr = epos->bh->b_data + epos->offset;
 
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		return -1;
 
@@ -1667,7 +1674,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 		char *sptr, *dptr;
 		struct buffer_head *nbh;
 		int err, loffset;
-		kernel_lb_addr obloc = epos->block;
+		struct kernel_lb_addr obloc = epos->block;
 
 		epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL,
 						obloc.partitionReferenceNum,
@@ -1675,7 +1682,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 		if (!epos->block.logicalBlockNum)
 			return -1;
 		nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
-								 epos->block,
+								 &epos->block,
 								 0));
 		if (!nbh)
 			return -1;
@@ -1712,20 +1719,20 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 		}
 		if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200)
 			udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
-				    epos->block.logicalBlockNum, sizeof(tag));
+				    epos->block.logicalBlockNum, sizeof(struct tag));
 		else
 			udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1,
-				    epos->block.logicalBlockNum, sizeof(tag));
+				    epos->block.logicalBlockNum, sizeof(struct tag));
 		switch (iinfo->i_alloc_type) {
 		case ICBTAG_FLAG_AD_SHORT:
-			sad = (short_ad *)sptr;
+			sad = (struct short_ad *)sptr;
 			sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
 						     inode->i_sb->s_blocksize);
 			sad->extPosition =
 				cpu_to_le32(epos->block.logicalBlockNum);
 			break;
 		case ICBTAG_FLAG_AD_LONG:
-			lad = (long_ad *)sptr;
+			lad = (struct long_ad *)sptr;
 			lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
 						     inode->i_sb->s_blocksize);
 			lad->extLocation = cpu_to_lelb(epos->block);
@@ -1769,12 +1776,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 }
 
 int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
-		      kernel_lb_addr eloc, uint32_t elen, int inc)
+		      struct kernel_lb_addr *eloc, uint32_t elen, int inc)
 {
 	int adsize;
 	uint8_t *ptr;
-	short_ad *sad;
-	long_ad *lad;
+	struct short_ad *sad;
+	struct long_ad *lad;
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
 	if (!epos->bh)
@@ -1786,17 +1793,17 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
 
 	switch (iinfo->i_alloc_type) {
 	case ICBTAG_FLAG_AD_SHORT:
-		sad = (short_ad *)ptr;
+		sad = (struct short_ad *)ptr;
 		sad->extLength = cpu_to_le32(elen);
-		sad->extPosition = cpu_to_le32(eloc.logicalBlockNum);
-		adsize = sizeof(short_ad);
+		sad->extPosition = cpu_to_le32(eloc->logicalBlockNum);
+		adsize = sizeof(struct short_ad);
 		break;
 	case ICBTAG_FLAG_AD_LONG:
-		lad = (long_ad *)ptr;
+		lad = (struct long_ad *)ptr;
 		lad->extLength = cpu_to_le32(elen);
-		lad->extLocation = cpu_to_lelb(eloc);
+		lad->extLocation = cpu_to_lelb(*eloc);
 		memset(lad->impUse, 0x00, sizeof(lad->impUse));
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 		break;
 	default:
 		return -1;
@@ -1823,7 +1830,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
 }
 
 int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
-		     kernel_lb_addr *eloc, uint32_t *elen, int inc)
+		     struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
 {
 	int8_t etype;
 
@@ -1833,7 +1840,7 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
 		epos->block = *eloc;
 		epos->offset = sizeof(struct allocExtDesc);
 		brelse(epos->bh);
-		block = udf_get_lb_pblock(inode->i_sb, epos->block, 0);
+		block = udf_get_lb_pblock(inode->i_sb, &epos->block, 0);
 		epos->bh = udf_tread(inode->i_sb, block);
 		if (!epos->bh) {
 			udf_debug("reading block %d failed!\n", block);
@@ -1845,13 +1852,13 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
 }
 
 int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
-			kernel_lb_addr *eloc, uint32_t *elen, int inc)
+			struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
 {
 	int alen;
 	int8_t etype;
 	uint8_t *ptr;
-	short_ad *sad;
-	long_ad *lad;
+	struct short_ad *sad;
+	struct long_ad *lad;
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
 	if (!epos->bh) {
@@ -1900,9 +1907,9 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
 }
 
 static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
-			      kernel_lb_addr neloc, uint32_t nelen)
+			      struct kernel_lb_addr neloc, uint32_t nelen)
 {
-	kernel_lb_addr oeloc;
+	struct kernel_lb_addr oeloc;
 	uint32_t oelen;
 	int8_t etype;
 
@@ -1910,18 +1917,18 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
 		get_bh(epos.bh);
 
 	while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) {
-		udf_write_aext(inode, &epos, neloc, nelen, 1);
+		udf_write_aext(inode, &epos, &neloc, nelen, 1);
 		neloc = oeloc;
 		nelen = (etype << 30) | oelen;
 	}
-	udf_add_aext(inode, &epos, neloc, nelen, 1);
+	udf_add_aext(inode, &epos, &neloc, nelen, 1);
 	brelse(epos.bh);
 
 	return (nelen >> 30);
 }
 
 int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
-		       kernel_lb_addr eloc, uint32_t elen)
+		       struct kernel_lb_addr eloc, uint32_t elen)
 {
 	struct extent_position oepos;
 	int adsize;
@@ -1936,9 +1943,9 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 
 	iinfo = UDF_I(inode);
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		adsize = 0;
 
@@ -1947,7 +1954,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 		return -1;
 
 	while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
-		udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1);
+		udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1);
 		if (oepos.bh != epos.bh) {
 			oepos.block = epos.block;
 			brelse(oepos.bh);
@@ -1956,13 +1963,13 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 			oepos.offset = epos.offset - adsize;
 		}
 	}
-	memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+	memset(&eloc, 0x00, sizeof(struct kernel_lb_addr));
 	elen = 0;
 
 	if (epos.bh != oepos.bh) {
-		udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1);
-		udf_write_aext(inode, &oepos, eloc, elen, 1);
-		udf_write_aext(inode, &oepos, eloc, elen, 1);
+		udf_free_blocks(inode->i_sb, inode, &epos.block, 0, 1);
+		udf_write_aext(inode, &oepos, &eloc, elen, 1);
+		udf_write_aext(inode, &oepos, &eloc, elen, 1);
 		if (!oepos.bh) {
 			iinfo->i_lenAlloc -= (adsize * 2);
 			mark_inode_dirty(inode);
@@ -1979,7 +1986,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 			mark_buffer_dirty_inode(oepos.bh, inode);
 		}
 	} else {
-		udf_write_aext(inode, &oepos, eloc, elen, 1);
+		udf_write_aext(inode, &oepos, &eloc, elen, 1);
 		if (!oepos.bh) {
 			iinfo->i_lenAlloc -= adsize;
 			mark_inode_dirty(inode);
@@ -2004,7 +2011,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 }
 
 int8_t inode_bmap(struct inode *inode, sector_t block,
-		  struct extent_position *pos, kernel_lb_addr *eloc,
+		  struct extent_position *pos, struct kernel_lb_addr *eloc,
 		  uint32_t *elen, sector_t *offset)
 {
 	unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
@@ -2036,7 +2043,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
 
 long udf_block_map(struct inode *inode, sector_t block)
 {
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t offset;
 	struct extent_position epos = {};
@@ -2046,7 +2053,7 @@ long udf_block_map(struct inode *inode, sector_t block)
 
 	if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
 						(EXT_RECORDED_ALLOCATED >> 30))
-		ret = udf_get_lb_pblock(inode->i_sb, eloc, offset);
+		ret = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
 	else
 		ret = 0;
 
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 84bf0fd..9215700 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -134,10 +134,10 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
 			}
 		}
 		/* rewrite CRC + checksum of eahd */
-		crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag);
+		crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(struct tag);
 		eahd->descTag.descCRCLength = cpu_to_le16(crclen);
 		eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd +
-						sizeof(tag), crclen));
+						sizeof(struct tag), crclen));
 		eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag);
 		iinfo->i_lenEAttr += size;
 		return (struct genericFormat *)&ea[offset];
@@ -202,7 +202,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
 struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 				    uint32_t location, uint16_t *ident)
 {
-	tag *tag_p;
+	struct tag *tag_p;
 	struct buffer_head *bh = NULL;
 
 	/* Read the block */
@@ -216,7 +216,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 		return NULL;
 	}
 
-	tag_p = (tag *)(bh->b_data);
+	tag_p = (struct tag *)(bh->b_data);
 
 	*ident = le16_to_cpu(tag_p->tagIdent);
 
@@ -241,9 +241,9 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 	}
 
 	/* Verify the descriptor CRC */
-	if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize ||
+	if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize ||
 	    le16_to_cpu(tag_p->descCRC) == crc_itu_t(0,
-					bh->b_data + sizeof(tag),
+					bh->b_data + sizeof(struct tag),
 					le16_to_cpu(tag_p->descCRCLength)))
 		return bh;
 
@@ -255,27 +255,28 @@ error_out:
 	return NULL;
 }
 
-struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc,
+struct buffer_head *udf_read_ptagged(struct super_block *sb,
+				     struct kernel_lb_addr *loc,
 				     uint32_t offset, uint16_t *ident)
 {
 	return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset),
-			       loc.logicalBlockNum + offset, ident);
+			       loc->logicalBlockNum + offset, ident);
 }
 
 void udf_update_tag(char *data, int length)
 {
-	tag *tptr = (tag *)data;
-	length -= sizeof(tag);
+	struct tag *tptr = (struct tag *)data;
+	length -= sizeof(struct tag);
 
 	tptr->descCRCLength = cpu_to_le16(length);
-	tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length));
+	tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(struct tag), length));
 	tptr->tagChecksum = udf_tag_checksum(tptr);
 }
 
 void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
 		 uint32_t loc, int length)
 {
-	tag *tptr = (tag *)data;
+	struct tag *tptr = (struct tag *)data;
 	tptr->tagIdent = cpu_to_le16(ident);
 	tptr->descVersion = cpu_to_le16(version);
 	tptr->tagSerialNum = cpu_to_le16(snum);
@@ -283,12 +284,12 @@ void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
 	udf_update_tag(data, length);
 }
 
-u8 udf_tag_checksum(const tag *t)
+u8 udf_tag_checksum(const struct tag *t)
 {
 	u8 *data = (u8 *)t;
 	u8 checksum = 0;
 	int i;
-	for (i = 0; i < sizeof(tag); ++i)
+	for (i = 0; i < sizeof(struct tag); ++i)
 		if (i != 4) /* position of checksum */
 			checksum += data[i];
 	return checksum;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f84bfaa..6a29fa3 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -47,7 +47,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
 		 struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh,
 		 uint8_t *impuse, uint8_t *fileident)
 {
-	uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag);
+	uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(struct tag);
 	uint16_t crc;
 	int offset;
 	uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse);
@@ -99,18 +99,18 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
 		memset(fibh->ebh->b_data, 0x00, padlen + offset);
 	}
 
-	crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag),
-		      sizeof(struct fileIdentDesc) - sizeof(tag));
+	crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(struct tag),
+		      sizeof(struct fileIdentDesc) - sizeof(struct tag));
 
 	if (fibh->sbh == fibh->ebh) {
 		crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
-			      crclen + sizeof(tag) -
+			      crclen + sizeof(struct tag) -
 			      sizeof(struct fileIdentDesc));
 	} else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) {
 		crc = crc_itu_t(crc, fibh->ebh->b_data +
 					sizeof(struct fileIdentDesc) +
 					fibh->soffset,
-			      crclen + sizeof(tag) -
+			      crclen + sizeof(struct tag) -
 					sizeof(struct fileIdentDesc));
 	} else {
 		crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
@@ -154,7 +154,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 	uint8_t lfi;
 	uint16_t liu;
 	loff_t size;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t offset;
 	struct extent_position epos = {};
@@ -171,12 +171,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 		if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
 		    &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
 			goto out_err;
-		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+		block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-				epos.offset -= sizeof(short_ad);
+				epos.offset -= sizeof(struct short_ad);
 			else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-				epos.offset -= sizeof(long_ad);
+				epos.offset -= sizeof(struct long_ad);
 		} else
 			offset = 0;
 
@@ -268,7 +268,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
 #ifdef UDF_RECOVERY
 	/* temporary shorthand for specifying files by inode number */
 	if (!strncmp(dentry->d_name.name, ".B=", 3)) {
-		kernel_lb_addr lb = {
+		struct kernel_lb_addr lb = {
 			.logicalBlockNum = 0,
 			.partitionReferenceNum =
 				simple_strtoul(dentry->d_name.name + 3,
@@ -283,11 +283,14 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
 #endif /* UDF_RECOVERY */
 
 	if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
+		struct kernel_lb_addr loc;
+
 		if (fibh.sbh != fibh.ebh)
 			brelse(fibh.ebh);
 		brelse(fibh.sbh);
 
-		inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation));
+		loc = lelb_to_cpu(cfi.icb.extLocation);
+		inode = udf_iget(dir->i_sb, &loc);
 		if (!inode) {
 			unlock_kernel();
 			return ERR_PTR(-EACCES);
@@ -313,7 +316,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 	uint8_t lfi;
 	uint16_t liu;
 	int block;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen = 0;
 	sector_t offset;
 	struct extent_position epos = {};
@@ -351,16 +354,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 		if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
 		    &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
 			block = udf_get_lb_pblock(dir->i_sb,
-					dinfo->i_location, 0);
+					&dinfo->i_location, 0);
 			fibh->soffset = fibh->eoffset = sb->s_blocksize;
 			goto add;
 		}
-		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+		block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-				epos.offset -= sizeof(short_ad);
+				epos.offset -= sizeof(struct short_ad);
 			else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-				epos.offset -= sizeof(long_ad);
+				epos.offset -= sizeof(struct long_ad);
 		} else
 			offset = 0;
 
@@ -409,10 +412,10 @@ add:
 	if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
 		elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
 		if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-			epos.offset -= sizeof(short_ad);
+			epos.offset -= sizeof(struct short_ad);
 		else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-			epos.offset -= sizeof(long_ad);
-		udf_write_aext(dir, &epos, eloc, elen, 1);
+			epos.offset -= sizeof(struct long_ad);
+		udf_write_aext(dir, &epos, &eloc, elen, 1);
 	}
 	f_pos += nfidlen;
 
@@ -494,10 +497,10 @@ add:
 	memset(cfi, 0, sizeof(struct fileIdentDesc));
 	if (UDF_SB(sb)->s_udfrev >= 0x0200)
 		udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block,
-			    sizeof(tag));
+			    sizeof(struct tag));
 	else
 		udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block,
-			    sizeof(tag));
+			    sizeof(struct tag));
 	cfi->fileVersionNum = cpu_to_le16(1);
 	cfi->lengthFileIdent = namelen;
 	cfi->lengthOfImpUse = cpu_to_le16(0);
@@ -530,7 +533,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
 	cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED;
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
-		memset(&(cfi->icb), 0x00, sizeof(long_ad));
+		memset(&(cfi->icb), 0x00, sizeof(struct long_ad));
 
 	return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
 }
@@ -710,7 +713,7 @@ static int empty_dir(struct inode *dir)
 	loff_t f_pos;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
 	int block;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t offset;
 	struct extent_position epos = {};
@@ -724,12 +727,12 @@ static int empty_dir(struct inode *dir)
 	else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
 			      &epos, &eloc, &elen, &offset) ==
 					(EXT_RECORDED_ALLOCATED >> 30)) {
-		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+		block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-				epos.offset -= sizeof(short_ad);
+				epos.offset -= sizeof(struct short_ad);
 			else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-				epos.offset -= sizeof(long_ad);
+				epos.offset -= sizeof(struct long_ad);
 		} else
 			offset = 0;
 
@@ -778,7 +781,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	struct udf_fileident_bh fibh;
 	struct fileIdentDesc *fi, cfi;
-	kernel_lb_addr tloc;
+	struct kernel_lb_addr tloc;
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -788,7 +791,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 
 	retval = -EIO;
 	tloc = lelb_to_cpu(cfi.icb.extLocation);
-	if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino)
+	if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
 		goto end_rmdir;
 	retval = -ENOTEMPTY;
 	if (!empty_dir(inode))
@@ -824,7 +827,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 	struct udf_fileident_bh fibh;
 	struct fileIdentDesc *fi;
 	struct fileIdentDesc cfi;
-	kernel_lb_addr tloc;
+	struct kernel_lb_addr tloc;
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -834,7 +837,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 
 	retval = -EIO;
 	tloc = lelb_to_cpu(cfi.icb.extLocation);
-	if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino)
+	if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
 		goto end_unlink;
 
 	if (!inode->i_nlink) {
@@ -897,7 +900,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	inode->i_op = &page_symlink_inode_operations;
 
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
-		kernel_lb_addr eloc;
+		struct kernel_lb_addr eloc;
 		uint32_t bsize;
 
 		block = udf_new_block(inode->i_sb, inode,
@@ -913,7 +916,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 				iinfo->i_location.partitionReferenceNum;
 		bsize = inode->i_sb->s_blocksize;
 		iinfo->i_lenExtents = bsize;
-		udf_add_aext(inode, &epos, eloc, bsize, 0);
+		udf_add_aext(inode, &epos, &eloc, bsize, 0);
 		brelse(epos.bh);
 
 		block = udf_get_pblock(inode->i_sb, block,
@@ -1108,7 +1111,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct fileIdentDesc ocfi, ncfi;
 	struct buffer_head *dir_bh = NULL;
 	int retval = -ENOENT;
-	kernel_lb_addr tloc;
+	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
 	lock_kernel();
@@ -1119,7 +1122,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 		brelse(ofibh.sbh);
 	}
 	tloc = lelb_to_cpu(ocfi.icb.extLocation);
-	if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0)
+	if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
 	    != old_inode->i_ino)
 		goto end_rename;
 
@@ -1158,7 +1161,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (!dir_fi)
 			goto end_rename;
 		tloc = lelb_to_cpu(dir_fi->icb.extLocation);
-		if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) !=
+		if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
 				old_dir->i_ino)
 			goto end_rename;
 
@@ -1187,7 +1190,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	ncfi.fileVersionNum = ocfi.fileVersionNum;
 	ncfi.fileCharacteristics = ocfi.fileCharacteristics;
-	memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(long_ad));
+	memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(struct long_ad));
 	udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
 
 	/* The old fid may have moved - find it again */
@@ -1242,6 +1245,7 @@ end_rename:
 
 static struct dentry *udf_get_parent(struct dentry *child)
 {
+	struct kernel_lb_addr tloc;
 	struct inode *inode = NULL;
 	struct qstr dotdot = {.name = "..", .len = 2};
 	struct fileIdentDesc cfi;
@@ -1255,8 +1259,8 @@ static struct dentry *udf_get_parent(struct dentry *child)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
 
-	inode = udf_iget(child->d_inode->i_sb,
-			 lelb_to_cpu(cfi.icb.extLocation));
+	tloc = lelb_to_cpu(cfi.icb.extLocation);
+	inode = udf_iget(child->d_inode->i_sb, &tloc);
 	if (!inode)
 		goto out_unlock;
 	unlock_kernel();
@@ -1272,14 +1276,14 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
 					u16 partref, __u32 generation)
 {
 	struct inode *inode;
-	kernel_lb_addr loc;
+	struct kernel_lb_addr loc;
 
 	if (block == 0)
 		return ERR_PTR(-ESTALE);
 
 	loc.logicalBlockNum = block;
 	loc.partitionReferenceNum = partref;
-	inode = udf_iget(sb, loc);
+	inode = udf_iget(sb, &loc);
 
 	if (inode == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1318,7 +1322,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
 {
 	int len = *lenp;
 	struct inode *inode =  de->d_inode;
-	kernel_lb_addr location = UDF_I(inode)->i_location;
+	struct kernel_lb_addr location = UDF_I(inode)->i_location;
 	struct fid *fid = (struct fid *)fh;
 	int type = FILEID_UDF_WITHOUT_PARENT;
 
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index 65ff479..fbff746 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -85,7 +85,7 @@ struct appIdentSuffix {
 /* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */
 /* Implementation Use (UDF 2.50 2.2.6.4) */
 struct logicalVolIntegrityDescImpUse {
-	regid		impIdent;
+	struct regid	impIdent;
 	__le32		numFiles;
 	__le32		numDirs;
 	__le16		minUDFReadRev;
@@ -97,12 +97,12 @@ struct logicalVolIntegrityDescImpUse {
 /* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */
 /* Implementation Use (UDF 2.50 2.2.7.2) */
 struct impUseVolDescImpUse {
-	charspec	LVICharset;
+	struct charspec	LVICharset;
 	dstring		logicalVolIdent[128];
 	dstring		LVInfo1[36];
 	dstring		LVInfo2[36];
 	dstring		LVInfo3[36];
-	regid		impIdent;
+	struct regid	impIdent;
 	uint8_t		impUse[128];
 } __attribute__ ((packed));
 
@@ -110,7 +110,7 @@ struct udfPartitionMap2 {
 	uint8_t		partitionMapType;
 	uint8_t		partitionMapLength;
 	uint8_t		reserved1[2];
-	regid		partIdent;
+	struct regid	partIdent;
 	__le16		volSeqNum;
 	__le16		partitionNum;
 } __attribute__ ((packed));
@@ -120,7 +120,7 @@ struct virtualPartitionMap {
 	uint8_t		partitionMapType;
 	uint8_t		partitionMapLength;
 	uint8_t		reserved1[2];
-	regid		partIdent;
+	struct regid	partIdent;
 	__le16		volSeqNum;
 	__le16		partitionNum;
 	uint8_t		reserved2[24];
@@ -131,7 +131,7 @@ struct sparablePartitionMap {
 	uint8_t partitionMapType;
 	uint8_t partitionMapLength;
 	uint8_t reserved1[2];
-	regid partIdent;
+	struct regid partIdent;
 	__le16 volSeqNum;
 	__le16 partitionNum;
 	__le16 packetLength;
@@ -146,7 +146,7 @@ struct metadataPartitionMap {
 	uint8_t		partitionMapType;
 	uint8_t		partitionMapLength;
 	uint8_t		reserved1[2];
-	regid		partIdent;
+	struct regid	partIdent;
 	__le16		volSeqNum;
 	__le16		partitionNum;
 	__le32		metadataFileLoc;
@@ -161,7 +161,7 @@ struct metadataPartitionMap {
 /* Virtual Allocation Table (UDF 1.5 2.2.10) */
 struct virtualAllocationTable15 {
 	__le32		VirtualSector[0];
-	regid		vatIdent;
+	struct regid	vatIdent;
 	__le32		previousVATICBLoc;
 } __attribute__ ((packed));
 
@@ -192,8 +192,8 @@ struct sparingEntry {
 } __attribute__ ((packed));
 
 struct sparingTable {
-	tag 		descTag;
-	regid		sparingIdent;
+	struct tag	descTag;
+	struct regid	sparingIdent;
 	__le16		reallocationTableLen;
 	__le16		reserved;
 	__le32		sequenceNum;
@@ -206,7 +206,7 @@ struct sparingTable {
 #define ICBTAG_FILE_TYPE_MIRROR		0xFB
 #define ICBTAG_FILE_TYPE_BITMAP		0xFC
 
-/* struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */
+/* struct struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */
 struct allocDescImpUse {
 	__le16		flags;
 	uint8_t		impUse[4];
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 96dfd20..4b540ee 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -273,7 +273,7 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
 {
 	struct super_block *sb = inode->i_sb;
 	struct udf_part_map *map;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t ext_offset;
 	struct extent_position epos = {};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e25e701..72348cc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -81,16 +81,13 @@ static char error_buf[1024];
 /* These are the "meat" - everything else is stuffing */
 static int udf_fill_super(struct super_block *, void *, int);
 static void udf_put_super(struct super_block *);
-static void udf_write_super(struct super_block *);
+static int udf_sync_fs(struct super_block *, int);
 static int udf_remount_fs(struct super_block *, int *, char *);
-static int udf_check_valid(struct super_block *, int, int);
-static int udf_vrs(struct super_block *sb, int silent);
-static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad);
-static void udf_find_anchor(struct super_block *);
-static int udf_find_fileset(struct super_block *, kernel_lb_addr *,
-			    kernel_lb_addr *);
+static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad);
+static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *,
+			    struct kernel_lb_addr *);
 static void udf_load_fileset(struct super_block *, struct buffer_head *,
-			     kernel_lb_addr *);
+			     struct kernel_lb_addr *);
 static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
@@ -181,7 +178,7 @@ static const struct super_operations udf_sb_ops = {
 	.delete_inode	= udf_delete_inode,
 	.clear_inode	= udf_clear_inode,
 	.put_super	= udf_put_super,
-	.write_super	= udf_write_super,
+	.sync_fs	= udf_sync_fs,
 	.statfs		= udf_statfs,
 	.remount_fs	= udf_remount_fs,
 	.show_options	= udf_show_options,
@@ -201,6 +198,8 @@ struct udf_options {
 	mode_t umask;
 	gid_t gid;
 	uid_t uid;
+	mode_t fmode;
+	mode_t dmode;
 	struct nls_table *nls_map;
 };
 
@@ -258,7 +257,7 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
 
 	if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
 		seq_puts(seq, ",nostrict");
-	if (sb->s_blocksize != UDF_DEFAULT_BLOCKSIZE)
+	if (UDF_QUERY_FLAG(sb, UDF_FLAG_BLOCKSIZE_SET))
 		seq_printf(seq, ",bs=%lu", sb->s_blocksize);
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
 		seq_puts(seq, ",unhide");
@@ -282,18 +281,16 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
 		seq_printf(seq, ",gid=%u", sbi->s_gid);
 	if (sbi->s_umask != 0)
 		seq_printf(seq, ",umask=%o", sbi->s_umask);
+	if (sbi->s_fmode != UDF_INVALID_MODE)
+		seq_printf(seq, ",mode=%o", sbi->s_fmode);
+	if (sbi->s_dmode != UDF_INVALID_MODE)
+		seq_printf(seq, ",dmode=%o", sbi->s_dmode);
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
 		seq_printf(seq, ",session=%u", sbi->s_session);
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
 		seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
-	/*
-	 * s_anchor[2] could be zeroed out in case there is no anchor
-	 * in the specified block, but then the "anchor=N" option
-	 * originally given by the user wasn't effective, so it's OK
-	 * if we don't show it.
-	 */
-	if (sbi->s_anchor[2] != 0)
-		seq_printf(seq, ",anchor=%u", sbi->s_anchor[2]);
+	if (sbi->s_anchor != 0)
+		seq_printf(seq, ",anchor=%u", sbi->s_anchor);
 	/*
 	 * volume, partition, fileset and rootdir seem to be ignored
 	 * currently
@@ -317,6 +314,8 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
  *
  *	gid=		Set the default group.
  *	umask=		Set the default umask.
+ *	mode=		Set the default file permissions.
+ *	dmode=		Set the default directory permissions.
  *	uid=		Set the default user.
  *	bs=		Set the block size.
  *	unhide		Show otherwise hidden files.
@@ -366,7 +365,8 @@ enum {
 	Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock,
 	Opt_anchor, Opt_volume, Opt_partition, Opt_fileset,
 	Opt_rootdir, Opt_utf8, Opt_iocharset,
-	Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore
+	Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore,
+	Opt_fmode, Opt_dmode
 };
 
 static const match_table_t tokens = {
@@ -395,6 +395,8 @@ static const match_table_t tokens = {
 	{Opt_rootdir,	"rootdir=%u"},
 	{Opt_utf8,	"utf8"},
 	{Opt_iocharset,	"iocharset=%s"},
+	{Opt_fmode,     "mode=%o"},
+	{Opt_dmode,     "dmode=%o"},
 	{Opt_err,	NULL}
 };
 
@@ -405,7 +407,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
 	int option;
 
 	uopt->novrs = 0;
-	uopt->blocksize = UDF_DEFAULT_BLOCKSIZE;
 	uopt->partition = 0xFFFF;
 	uopt->session = 0xFFFFFFFF;
 	uopt->lastblock = 0;
@@ -428,10 +429,12 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
 		switch (token) {
 		case Opt_novrs:
 			uopt->novrs = 1;
+			break;
 		case Opt_bs:
 			if (match_int(&args[0], &option))
 				return 0;
 			uopt->blocksize = option;
+			uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET);
 			break;
 		case Opt_unhide:
 			uopt->flags |= (1 << UDF_FLAG_UNHIDE);
@@ -531,6 +534,16 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
 		case Opt_gforget:
 			uopt->flags |= (1 << UDF_FLAG_GID_FORGET);
 			break;
+		case Opt_fmode:
+			if (match_octal(args, &option))
+				return 0;
+			uopt->fmode = option & 0777;
+			break;
+		case Opt_dmode:
+			if (match_octal(args, &option))
+				return 0;
+			uopt->dmode = option & 0777;
+			break;
 		default:
 			printk(KERN_ERR "udf: bad mount option \"%s\" "
 			       "or missing value\n", p);
@@ -540,17 +553,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
 	return 1;
 }
 
-static void udf_write_super(struct super_block *sb)
-{
-	lock_kernel();
-
-	if (!(sb->s_flags & MS_RDONLY))
-		udf_open_lvid(sb);
-	sb->s_dirt = 0;
-
-	unlock_kernel();
-}
-
 static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 {
 	struct udf_options uopt;
@@ -560,6 +562,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	uopt.uid   = sbi->s_uid;
 	uopt.gid   = sbi->s_gid;
 	uopt.umask = sbi->s_umask;
+	uopt.fmode = sbi->s_fmode;
+	uopt.dmode = sbi->s_dmode;
 
 	if (!udf_parse_options(options, &uopt, true))
 		return -EINVAL;
@@ -568,6 +572,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	sbi->s_uid   = uopt.uid;
 	sbi->s_gid   = uopt.gid;
 	sbi->s_umask = uopt.umask;
+	sbi->s_fmode = uopt.fmode;
+	sbi->s_dmode = uopt.dmode;
 
 	if (sbi->s_lvid_bh) {
 		int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -585,22 +591,19 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	return 0;
 }
 
-static int udf_vrs(struct super_block *sb, int silent)
+/* Check Volume Structure Descriptors (ECMA 167 2/9.1) */
+/* We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */
+static loff_t udf_check_vsd(struct super_block *sb)
 {
 	struct volStructDesc *vsd = NULL;
 	loff_t sector = 32768;
 	int sectorsize;
 	struct buffer_head *bh = NULL;
-	int iso9660 = 0;
 	int nsr02 = 0;
 	int nsr03 = 0;
 	struct udf_sb_info *sbi;
 
-	/* Block size must be a multiple of 512 */
-	if (sb->s_blocksize & 511)
-		return 0;
 	sbi = UDF_SB(sb);
-
 	if (sb->s_blocksize < sizeof(struct volStructDesc))
 		sectorsize = sizeof(struct volStructDesc);
 	else
@@ -627,7 +630,6 @@ static int udf_vrs(struct super_block *sb, int silent)
 			break;
 		} else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
 				    VSD_STD_ID_LEN)) {
-			iso9660 = sector;
 			switch (vsd->structType) {
 			case 0:
 				udf_debug("ISO9660 Boot Record found\n");
@@ -679,139 +681,9 @@ static int udf_vrs(struct super_block *sb, int silent)
 		return 0;
 }
 
-/*
- * Check whether there is an anchor block in the given block
- */
-static int udf_check_anchor_block(struct super_block *sb, sector_t block)
-{
-	struct buffer_head *bh;
-	uint16_t ident;
-
-	if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
-	    udf_fixed_to_variable(block) >=
-	    sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
-		return 0;
-
-	bh = udf_read_tagged(sb, block, block, &ident);
-	if (!bh)
-		return 0;
-	brelse(bh);
-
-	return ident == TAG_IDENT_AVDP;
-}
-
-/* Search for an anchor volume descriptor pointer */
-static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock)
-{
-	sector_t last[6];
-	int i;
-	struct udf_sb_info *sbi = UDF_SB(sb);
-
-	last[0] = lastblock;
-	last[1] = last[0] - 1;
-	last[2] = last[0] + 1;
-	last[3] = last[0] - 2;
-	last[4] = last[0] - 150;
-	last[5] = last[0] - 152;
-
-	/*  according to spec, anchor is in either:
-	 *     block 256
-	 *     lastblock-256
-	 *     lastblock
-	 *  however, if the disc isn't closed, it could be 512 */
-
-	for (i = 0; i < ARRAY_SIZE(last); i++) {
-		if (last[i] < 0)
-			continue;
-		if (last[i] >= sb->s_bdev->bd_inode->i_size >>
-				sb->s_blocksize_bits)
-			continue;
-
-		if (udf_check_anchor_block(sb, last[i])) {
-			sbi->s_anchor[0] = last[i];
-			sbi->s_anchor[1] = last[i] - 256;
-			return last[i];
-		}
-
-		if (last[i] < 256)
-			continue;
-
-		if (udf_check_anchor_block(sb, last[i] - 256)) {
-			sbi->s_anchor[1] = last[i] - 256;
-			return last[i];
-		}
-	}
-
-	if (udf_check_anchor_block(sb, sbi->s_session + 256)) {
-		sbi->s_anchor[0] = sbi->s_session + 256;
-		return last[0];
-	}
-	if (udf_check_anchor_block(sb, sbi->s_session + 512)) {
-		sbi->s_anchor[0] = sbi->s_session + 512;
-		return last[0];
-	}
-	return 0;
-}
-
-/*
- * Find an anchor volume descriptor. The function expects sbi->s_lastblock to
- * be the last block on the media.
- *
- * Return 1 if not found, 0 if ok
- *
- */
-static void udf_find_anchor(struct super_block *sb)
-{
-	sector_t lastblock;
-	struct buffer_head *bh = NULL;
-	uint16_t ident;
-	int i;
-	struct udf_sb_info *sbi = UDF_SB(sb);
-
-	lastblock = udf_scan_anchors(sb, sbi->s_last_block);
-	if (lastblock)
-		goto check_anchor;
-
-	/* No anchor found? Try VARCONV conversion of block numbers */
-	UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
-	/* Firstly, we try to not convert number of the last block */
-	lastblock = udf_scan_anchors(sb,
-				udf_variable_to_fixed(sbi->s_last_block));
-	if (lastblock)
-		goto check_anchor;
-
-	/* Secondly, we try with converted number of the last block */
-	lastblock = udf_scan_anchors(sb, sbi->s_last_block);
-	if (!lastblock) {
-		/* VARCONV didn't help. Clear it. */
-		UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
-	}
-
-check_anchor:
-	/*
-	 * Check located anchors and the anchor block supplied via
-	 * mount options
-	 */
-	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
-		if (!sbi->s_anchor[i])
-			continue;
-		bh = udf_read_tagged(sb, sbi->s_anchor[i],
-					sbi->s_anchor[i], &ident);
-		if (!bh)
-			sbi->s_anchor[i] = 0;
-		else {
-			brelse(bh);
-			if (ident != TAG_IDENT_AVDP)
-				sbi->s_anchor[i] = 0;
-		}
-	}
-
-	sbi->s_last_block = lastblock;
-}
-
 static int udf_find_fileset(struct super_block *sb,
-			    kernel_lb_addr *fileset,
-			    kernel_lb_addr *root)
+			    struct kernel_lb_addr *fileset,
+			    struct kernel_lb_addr *root)
 {
 	struct buffer_head *bh = NULL;
 	long lastblock;
@@ -820,7 +692,7 @@ static int udf_find_fileset(struct super_block *sb,
 
 	if (fileset->logicalBlockNum != 0xFFFFFFFF ||
 	    fileset->partitionReferenceNum != 0xFFFF) {
-		bh = udf_read_ptagged(sb, *fileset, 0, &ident);
+		bh = udf_read_ptagged(sb, fileset, 0, &ident);
 
 		if (!bh) {
 			return 1;
@@ -834,7 +706,7 @@ static int udf_find_fileset(struct super_block *sb,
 	sbi = UDF_SB(sb);
 	if (!bh) {
 		/* Search backwards through the partitions */
-		kernel_lb_addr newfileset;
+		struct kernel_lb_addr newfileset;
 
 /* --> cvg: FIXME - is it reasonable? */
 		return 1;
@@ -850,7 +722,7 @@ static int udf_find_fileset(struct super_block *sb,
 			newfileset.logicalBlockNum = 0;
 
 			do {
-				bh = udf_read_ptagged(sb, newfileset, 0,
+				bh = udf_read_ptagged(sb, &newfileset, 0,
 						      &ident);
 				if (!bh) {
 					newfileset.logicalBlockNum++;
@@ -902,14 +774,23 @@ static int udf_find_fileset(struct super_block *sb,
 static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
 {
 	struct primaryVolDesc *pvoldesc;
-	struct ustr instr;
-	struct ustr outstr;
+	struct ustr *instr, *outstr;
 	struct buffer_head *bh;
 	uint16_t ident;
+	int ret = 1;
+
+	instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
+	if (!instr)
+		return 1;
+
+	outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
+	if (!outstr)
+		goto out1;
 
 	bh = udf_read_tagged(sb, block, block, &ident);
 	if (!bh)
-		return 1;
+		goto out2;
+
 	BUG_ON(ident != TAG_IDENT_PVD);
 
 	pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -917,7 +798,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
 	if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time,
 			      pvoldesc->recordingDateAndTime)) {
 #ifdef UDFFS_DEBUG
-		timestamp *ts = &pvoldesc->recordingDateAndTime;
+		struct timestamp *ts = &pvoldesc->recordingDateAndTime;
 		udf_debug("recording time %04u/%02u/%02u"
 			  " %02u:%02u (%x)\n",
 			  le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
@@ -925,20 +806,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
 #endif
 	}
 
-	if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32))
-		if (udf_CS0toUTF8(&outstr, &instr)) {
-			strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name,
-				outstr.u_len > 31 ? 31 : outstr.u_len);
+	if (!udf_build_ustr(instr, pvoldesc->volIdent, 32))
+		if (udf_CS0toUTF8(outstr, instr)) {
+			strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
+				outstr->u_len > 31 ? 31 : outstr->u_len);
 			udf_debug("volIdent[] = '%s'\n",
 					UDF_SB(sb)->s_volume_ident);
 		}
 
-	if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128))
-		if (udf_CS0toUTF8(&outstr, &instr))
-			udf_debug("volSetIdent[] = '%s'\n", outstr.u_name);
+	if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
+		if (udf_CS0toUTF8(outstr, instr))
+			udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
 
 	brelse(bh);
-	return 0;
+	ret = 0;
+out2:
+	kfree(outstr);
+out1:
+	kfree(instr);
+	return ret;
 }
 
 static int udf_load_metadata_files(struct super_block *sb, int partition)
@@ -946,7 +832,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct udf_part_map *map;
 	struct udf_meta_data *mdata;
-	kernel_lb_addr addr;
+	struct kernel_lb_addr addr;
 	int fe_error = 0;
 
 	map = &sbi->s_partmaps[partition];
@@ -959,7 +845,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 	udf_debug("Metadata file location: block = %d part = %d\n",
 			  addr.logicalBlockNum, addr.partitionReferenceNum);
 
-	mdata->s_metadata_fe = udf_iget(sb, addr);
+	mdata->s_metadata_fe = udf_iget(sb, &addr);
 
 	if (mdata->s_metadata_fe == NULL) {
 		udf_warning(sb, __func__, "metadata inode efe not found, "
@@ -981,7 +867,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 	udf_debug("Mirror metadata file location: block = %d part = %d\n",
 			  addr.logicalBlockNum, addr.partitionReferenceNum);
 
-	mdata->s_mirror_fe = udf_iget(sb, addr);
+	mdata->s_mirror_fe = udf_iget(sb, &addr);
 
 	if (mdata->s_mirror_fe == NULL) {
 		if (fe_error) {
@@ -1013,7 +899,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 		udf_debug("Bitmap file location: block = %d part = %d\n",
 			addr.logicalBlockNum, addr.partitionReferenceNum);
 
-		mdata->s_bitmap_fe = udf_iget(sb, addr);
+		mdata->s_bitmap_fe = udf_iget(sb, &addr);
 
 		if (mdata->s_bitmap_fe == NULL) {
 			if (sb->s_flags & MS_RDONLY)
@@ -1037,7 +923,7 @@ error_exit:
 }
 
 static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
-			     kernel_lb_addr *root)
+			     struct kernel_lb_addr *root)
 {
 	struct fileSetDesc *fset;
 
@@ -1119,13 +1005,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
 
 	phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
 	if (phd->unallocSpaceTable.extLength) {
-		kernel_lb_addr loc = {
+		struct kernel_lb_addr loc = {
 			.logicalBlockNum = le32_to_cpu(
 				phd->unallocSpaceTable.extPosition),
 			.partitionReferenceNum = p_index,
 		};
 
-		map->s_uspace.s_table = udf_iget(sb, loc);
+		map->s_uspace.s_table = udf_iget(sb, &loc);
 		if (!map->s_uspace.s_table) {
 			udf_debug("cannot load unallocSpaceTable (part %d)\n",
 					p_index);
@@ -1154,13 +1040,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
 		udf_debug("partitionIntegrityTable (part %d)\n", p_index);
 
 	if (phd->freedSpaceTable.extLength) {
-		kernel_lb_addr loc = {
+		struct kernel_lb_addr loc = {
 			.logicalBlockNum = le32_to_cpu(
 				phd->freedSpaceTable.extPosition),
 			.partitionReferenceNum = p_index,
 		};
 
-		map->s_fspace.s_table = udf_iget(sb, loc);
+		map->s_fspace.s_table = udf_iget(sb, &loc);
 		if (!map->s_fspace.s_table) {
 			udf_debug("cannot load freedSpaceTable (part %d)\n",
 				p_index);
@@ -1192,7 +1078,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct udf_part_map *map = &sbi->s_partmaps[p_index];
-	kernel_lb_addr ino;
+	struct kernel_lb_addr ino;
 	struct buffer_head *bh = NULL;
 	struct udf_inode_info *vati;
 	uint32_t pos;
@@ -1201,7 +1087,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
 	/* VAT file entry is in the last recorded block */
 	ino.partitionReferenceNum = type1_index;
 	ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root;
-	sbi->s_vat_inode = udf_iget(sb, ino);
+	sbi->s_vat_inode = udf_iget(sb, &ino);
 	if (!sbi->s_vat_inode)
 		return 1;
 
@@ -1322,7 +1208,7 @@ out_bh:
 }
 
 static int udf_load_logicalvol(struct super_block *sb, sector_t block,
-			       kernel_lb_addr *fileset)
+			       struct kernel_lb_addr *fileset)
 {
 	struct logicalVolDesc *lvd;
 	int i, j, offset;
@@ -1471,7 +1357,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 	}
 
 	if (fileset) {
-		long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]);
+		struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
 
 		*fileset = lelb_to_cpu(la->extLocation);
 		udf_debug("FileSet found in LogicalVolDesc at block=%d, "
@@ -1490,7 +1376,7 @@ out_bh:
  * udf_load_logicalvolint
  *
  */
-static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
+static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc)
 {
 	struct buffer_head *bh = NULL;
 	uint16_t ident;
@@ -1533,7 +1419,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
  *	Written, tested, and released.
  */
 static noinline int udf_process_sequence(struct super_block *sb, long block,
-				long lastblock, kernel_lb_addr *fileset)
+				long lastblock, struct kernel_lb_addr *fileset)
 {
 	struct buffer_head *bh = NULL;
 	struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1655,85 +1541,199 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
 	return 0;
 }
 
+static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
+			     struct kernel_lb_addr *fileset)
+{
+	struct anchorVolDescPtr *anchor;
+	long main_s, main_e, reserve_s, reserve_e;
+	struct udf_sb_info *sbi;
+
+	sbi = UDF_SB(sb);
+	anchor = (struct anchorVolDescPtr *)bh->b_data;
+
+	/* Locate the main sequence */
+	main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation);
+	main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength);
+	main_e = main_e >> sb->s_blocksize_bits;
+	main_e += main_s;
+
+	/* Locate the reserve sequence */
+	reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation);
+	reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength);
+	reserve_e = reserve_e >> sb->s_blocksize_bits;
+	reserve_e += reserve_s;
+
+	/* Process the main & reserve sequences */
+	/* responsible for finding the PartitionDesc(s) */
+	if (!udf_process_sequence(sb, main_s, main_e, fileset))
+		return 1;
+	return !udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+}
+
 /*
- * udf_check_valid()
+ * Check whether there is an anchor block in the given block and
+ * load Volume Descriptor Sequence if so.
  */
-static int udf_check_valid(struct super_block *sb, int novrs, int silent)
+static int udf_check_anchor_block(struct super_block *sb, sector_t block,
+				  struct kernel_lb_addr *fileset)
 {
-	long block;
-	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct buffer_head *bh;
+	uint16_t ident;
+	int ret;
 
-	if (novrs) {
-		udf_debug("Validity check skipped because of novrs option\n");
+	if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
+	    udf_fixed_to_variable(block) >=
+	    sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
+		return 0;
+
+	bh = udf_read_tagged(sb, block, block, &ident);
+	if (!bh)
+		return 0;
+	if (ident != TAG_IDENT_AVDP) {
+		brelse(bh);
 		return 0;
 	}
-	/* Check that it is NSR02 compliant */
-	/* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */
-	block = udf_vrs(sb, silent);
-	if (block == -1)
-		udf_debug("Failed to read byte 32768. Assuming open "
-			  "disc. Skipping validity check\n");
-	if (block && !sbi->s_last_block)
-		sbi->s_last_block = udf_get_last_block(sb);
-	return !block;
+	ret = udf_load_sequence(sb, bh, fileset);
+	brelse(bh);
+	return ret;
 }
 
-static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset)
+/* Search for an anchor volume descriptor pointer */
+static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
+				 struct kernel_lb_addr *fileset)
 {
-	struct anchorVolDescPtr *anchor;
-	uint16_t ident;
-	struct buffer_head *bh;
-	long main_s, main_e, reserve_s, reserve_e;
+	sector_t last[6];
 	int i;
-	struct udf_sb_info *sbi;
-
-	if (!sb)
-		return 1;
-	sbi = UDF_SB(sb);
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	int last_count = 0;
 
-	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
-		if (!sbi->s_anchor[i])
+	/* First try user provided anchor */
+	if (sbi->s_anchor) {
+		if (udf_check_anchor_block(sb, sbi->s_anchor, fileset))
+			return lastblock;
+	}
+	/*
+	 * according to spec, anchor is in either:
+	 *     block 256
+	 *     lastblock-256
+	 *     lastblock
+	 *  however, if the disc isn't closed, it could be 512.
+	 */
+	if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset))
+		return lastblock;
+	/*
+	 * The trouble is which block is the last one. Drives often misreport
+	 * this so we try various possibilities.
+	 */
+	last[last_count++] = lastblock;
+	if (lastblock >= 1)
+		last[last_count++] = lastblock - 1;
+	last[last_count++] = lastblock + 1;
+	if (lastblock >= 2)
+		last[last_count++] = lastblock - 2;
+	if (lastblock >= 150)
+		last[last_count++] = lastblock - 150;
+	if (lastblock >= 152)
+		last[last_count++] = lastblock - 152;
+
+	for (i = 0; i < last_count; i++) {
+		if (last[i] >= sb->s_bdev->bd_inode->i_size >>
+				sb->s_blocksize_bits)
 			continue;
-
-		bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i],
-				     &ident);
-		if (!bh)
+		if (udf_check_anchor_block(sb, last[i], fileset))
+			return last[i];
+		if (last[i] < 256)
 			continue;
+		if (udf_check_anchor_block(sb, last[i] - 256, fileset))
+			return last[i];
+	}
 
-		anchor = (struct anchorVolDescPtr *)bh->b_data;
+	/* Finally try block 512 in case media is open */
+	if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset))
+		return last[0];
+	return 0;
+}
 
-		/* Locate the main sequence */
-		main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation);
-		main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength);
-		main_e = main_e >> sb->s_blocksize_bits;
-		main_e += main_s;
+/*
+ * Find an anchor volume descriptor and load Volume Descriptor Sequence from
+ * area specified by it. The function expects sbi->s_lastblock to be the last
+ * block on the media.
+ *
+ * Return 1 if ok, 0 if not found.
+ *
+ */
+static int udf_find_anchor(struct super_block *sb,
+			   struct kernel_lb_addr *fileset)
+{
+	sector_t lastblock;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
-		/* Locate the reserve sequence */
-		reserve_s = le32_to_cpu(
-				anchor->reserveVolDescSeqExt.extLocation);
-		reserve_e = le32_to_cpu(
-				anchor->reserveVolDescSeqExt.extLength);
-		reserve_e = reserve_e >> sb->s_blocksize_bits;
-		reserve_e += reserve_s;
+	lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
+	if (lastblock)
+		goto out;
 
-		brelse(bh);
+	/* No anchor found? Try VARCONV conversion of block numbers */
+	UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+	/* Firstly, we try to not convert number of the last block */
+	lastblock = udf_scan_anchors(sb,
+				udf_variable_to_fixed(sbi->s_last_block),
+				fileset);
+	if (lastblock)
+		goto out;
 
-		/* Process the main & reserve sequences */
-		/* responsible for finding the PartitionDesc(s) */
-		if (!(udf_process_sequence(sb, main_s, main_e,
-					   fileset) &&
-		      udf_process_sequence(sb, reserve_s, reserve_e,
-					   fileset)))
-			break;
+	/* Secondly, we try with converted number of the last block */
+	lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
+	if (!lastblock) {
+		/* VARCONV didn't help. Clear it. */
+		UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
+		return 0;
 	}
+out:
+	sbi->s_last_block = lastblock;
+	return 1;
+}
 
-	if (i == ARRAY_SIZE(sbi->s_anchor)) {
-		udf_debug("No Anchor block found\n");
-		return 1;
+/*
+ * Check Volume Structure Descriptor, find Anchor block and load Volume
+ * Descriptor Sequence
+ */
+static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
+			int silent, struct kernel_lb_addr *fileset)
+{
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	loff_t nsr_off;
+
+	if (!sb_set_blocksize(sb, uopt->blocksize)) {
+		if (!silent)
+			printk(KERN_WARNING "UDF-fs: Bad block size\n");
+		return 0;
+	}
+	sbi->s_last_block = uopt->lastblock;
+	if (!uopt->novrs) {
+		/* Check that it is NSR02 compliant */
+		nsr_off = udf_check_vsd(sb);
+		if (!nsr_off) {
+			if (!silent)
+				printk(KERN_WARNING "UDF-fs: No VRS found\n");
+			return 0;
+		}
+		if (nsr_off == -1)
+			udf_debug("Failed to read byte 32768. Assuming open "
+				  "disc. Skipping validity check\n");
+		if (!sbi->s_last_block)
+			sbi->s_last_block = udf_get_last_block(sb);
+	} else {
+		udf_debug("Validity check skipped because of novrs option\n");
 	}
-	udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]);
 
-	return 0;
+	/* Look for anchor block and load Volume Descriptor Sequence */
+	sbi->s_anchor = uopt->anchor;
+	if (!udf_find_anchor(sb, fileset)) {
+		if (!silent)
+			printk(KERN_WARNING "UDF-fs: No anchor found\n");
+		return 0;
+	}
+	return 1;
 }
 
 static void udf_open_lvid(struct super_block *sb)
@@ -1742,9 +1742,9 @@ static void udf_open_lvid(struct super_block *sb)
 	struct buffer_head *bh = sbi->s_lvid_bh;
 	struct logicalVolIntegrityDesc *lvid;
 	struct logicalVolIntegrityDescImpUse *lvidiu;
+
 	if (!bh)
 		return;
-
 	lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
 	lvidiu = udf_sb_lvidiu(sbi);
 
@@ -1752,14 +1752,15 @@ static void udf_open_lvid(struct super_block *sb)
 	lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
 	udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
 				CURRENT_TIME);
-	lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN;
+	lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN);
 
 	lvid->descTag.descCRC = cpu_to_le16(
-		crc_itu_t(0, (char *)lvid + sizeof(tag),
+		crc_itu_t(0, (char *)lvid + sizeof(struct tag),
 			le16_to_cpu(lvid->descTag.descCRCLength)));
 
 	lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
 	mark_buffer_dirty(bh);
+	sbi->s_lvid_dirty = 0;
 }
 
 static void udf_close_lvid(struct super_block *sb)
@@ -1773,10 +1774,6 @@ static void udf_close_lvid(struct super_block *sb)
 		return;
 
 	lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
-
-	if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN)
-		return;
-
 	lvidiu = udf_sb_lvidiu(sbi);
 	lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
 	lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1790,11 +1787,12 @@ static void udf_close_lvid(struct super_block *sb)
 	lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
 
 	lvid->descTag.descCRC = cpu_to_le16(
-			crc_itu_t(0, (char *)lvid + sizeof(tag),
+			crc_itu_t(0, (char *)lvid + sizeof(struct tag),
 				le16_to_cpu(lvid->descTag.descCRCLength)));
 
 	lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
 	mark_buffer_dirty(bh);
+	sbi->s_lvid_dirty = 0;
 }
 
 static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1846,15 +1844,18 @@ static void udf_free_partition(struct udf_part_map *map)
 static int udf_fill_super(struct super_block *sb, void *options, int silent)
 {
 	int i;
+	int ret;
 	struct inode *inode = NULL;
 	struct udf_options uopt;
-	kernel_lb_addr rootdir, fileset;
+	struct kernel_lb_addr rootdir, fileset;
 	struct udf_sb_info *sbi;
 
 	uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
 	uopt.uid = -1;
 	uopt.gid = -1;
 	uopt.umask = 0;
+	uopt.fmode = UDF_INVALID_MODE;
+	uopt.dmode = UDF_INVALID_MODE;
 
 	sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
 	if (!sbi)
@@ -1892,15 +1893,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sbi->s_uid = uopt.uid;
 	sbi->s_gid = uopt.gid;
 	sbi->s_umask = uopt.umask;
+	sbi->s_fmode = uopt.fmode;
+	sbi->s_dmode = uopt.dmode;
 	sbi->s_nls_map = uopt.nls_map;
 
-	/* Set the block size for all transfers */
-	if (!sb_min_blocksize(sb, uopt.blocksize)) {
-		udf_debug("Bad block size (%d)\n", uopt.blocksize);
-		printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize);
-		goto error_out;
-	}
-
 	if (uopt.session == 0xFFFFFFFF)
 		sbi->s_session = udf_get_last_session(sb);
 	else
@@ -1908,18 +1904,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 
 	udf_debug("Multi-session=%d\n", sbi->s_session);
 
-	sbi->s_last_block = uopt.lastblock;
-	sbi->s_anchor[0] = sbi->s_anchor[1] = 0;
-	sbi->s_anchor[2] = uopt.anchor;
-
-	if (udf_check_valid(sb, uopt.novrs, silent)) {
-		/* read volume recognition sequences */
-		printk(KERN_WARNING "UDF-fs: No VRS found\n");
-		goto error_out;
-	}
-
-	udf_find_anchor(sb);
-
 	/* Fill in the rest of the superblock */
 	sb->s_op = &udf_sb_ops;
 	sb->s_export_op = &udf_export_ops;
@@ -1928,7 +1912,21 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sb->s_magic = UDF_SUPER_MAGIC;
 	sb->s_time_gran = 1000;
 
-	if (udf_load_sequence(sb, &fileset)) {
+	if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) {
+		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
+	} else {
+		uopt.blocksize = bdev_hardsect_size(sb->s_bdev);
+		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
+		if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
+			if (!silent)
+				printk(KERN_NOTICE
+				       "UDF-fs: Rescanning with blocksize "
+				       "%d\n", UDF_DEFAULT_BLOCKSIZE);
+			uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
+			ret = udf_load_vrs(sb, &uopt, silent, &fileset);
+		}
+	}
+	if (!ret) {
 		printk(KERN_WARNING "UDF-fs: No partition found (1)\n");
 		goto error_out;
 	}
@@ -1978,7 +1976,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	}
 
 	if (!silent) {
-		timestamp ts;
+		struct timestamp ts;
 		udf_time_to_disk_stamp(&ts, sbi->s_record_time);
 		udf_info("UDF: Mounting volume '%s', "
 			 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
@@ -1991,7 +1989,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	/* Assign the root inode */
 	/* assign inodes by physical block number */
 	/* perhaps it's not extensible enough, but for now ... */
-	inode = udf_iget(sb, rootdir);
+	inode = udf_iget(sb, &rootdir);
 	if (!inode) {
 		printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, "
 				"partition=%d\n",
@@ -2081,11 +2079,31 @@ static void udf_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
+static int udf_sync_fs(struct super_block *sb, int wait)
+{
+	struct udf_sb_info *sbi = UDF_SB(sb);
+
+	mutex_lock(&sbi->s_alloc_mutex);
+	if (sbi->s_lvid_dirty) {
+		/*
+		 * Blockdevice will be synced later so we don't have to submit
+		 * the buffer for IO
+		 */
+		mark_buffer_dirty(sbi->s_lvid_bh);
+		sb->s_dirt = 0;
+		sbi->s_lvid_dirty = 0;
+	}
+	mutex_unlock(&sbi->s_alloc_mutex);
+
+	return 0;
+}
+
 static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct logicalVolIntegrityDescImpUse *lvidiu;
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
 	if (sbi->s_lvid_bh != NULL)
 		lvidiu = udf_sb_lvidiu(sbi);
@@ -2101,8 +2119,9 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 					  le32_to_cpu(lvidiu->numDirs)) : 0)
 			+ buf->f_bfree;
 	buf->f_ffree = buf->f_bfree;
-	/* __kernel_fsid_t f_fsid */
 	buf->f_namelen = UDF_NAME_LEN - 2;
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
 
 	return 0;
 }
@@ -2114,7 +2133,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
 	unsigned int accum = 0;
 	int index;
 	int block = 0, newblock;
-	kernel_lb_addr loc;
+	struct kernel_lb_addr loc;
 	uint32_t bytes;
 	uint8_t *ptr;
 	uint16_t ident;
@@ -2124,7 +2143,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
 
 	loc.logicalBlockNum = bitmap->s_extPosition;
 	loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
-	bh = udf_read_ptagged(sb, loc, 0, &ident);
+	bh = udf_read_ptagged(sb, &loc, 0, &ident);
 
 	if (!bh) {
 		printk(KERN_ERR "udf: udf_count_free failed\n");
@@ -2147,7 +2166,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
 		bytes -= cur_bytes;
 		if (bytes) {
 			brelse(bh);
-			newblock = udf_get_lb_pblock(sb, loc, ++block);
+			newblock = udf_get_lb_pblock(sb, &loc, ++block);
 			bh = udf_tread(sb, newblock);
 			if (!bh) {
 				udf_debug("read failed\n");
@@ -2170,7 +2189,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
 {
 	unsigned int accum = 0;
 	uint32_t elen;
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	int8_t etype;
 	struct extent_position epos;
 
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 65e19b4..225527c 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -28,10 +28,10 @@
 #include "udf_sb.h"
 
 static void extent_trunc(struct inode *inode, struct extent_position *epos,
-			 kernel_lb_addr eloc, int8_t etype, uint32_t elen,
+			 struct kernel_lb_addr *eloc, int8_t etype, uint32_t elen,
 			 uint32_t nelen)
 {
-	kernel_lb_addr neloc = {};
+	struct kernel_lb_addr neloc = {};
 	int last_block = (elen + inode->i_sb->s_blocksize - 1) >>
 		inode->i_sb->s_blocksize_bits;
 	int first_block = (nelen + inode->i_sb->s_blocksize - 1) >>
@@ -43,12 +43,12 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
 					last_block);
 			etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30);
 		} else
-			neloc = eloc;
+			neloc = *eloc;
 		nelen = (etype << 30) | nelen;
 	}
 
 	if (elen != nelen) {
-		udf_write_aext(inode, epos, neloc, nelen, 0);
+		udf_write_aext(inode, epos, &neloc, nelen, 0);
 		if (last_block - first_block > 0) {
 			if (etype == (EXT_RECORDED_ALLOCATED >> 30))
 				mark_inode_dirty(inode);
@@ -68,7 +68,7 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
 void udf_truncate_tail_extent(struct inode *inode)
 {
 	struct extent_position epos = {};
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen, nelen;
 	uint64_t lbcount = 0;
 	int8_t etype = -1, netype;
@@ -83,9 +83,9 @@ void udf_truncate_tail_extent(struct inode *inode)
 		return;
 
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		BUG();
 
@@ -106,7 +106,7 @@ void udf_truncate_tail_extent(struct inode *inode)
 				       (unsigned)elen);
 			nelen = elen - (lbcount - inode->i_size);
 			epos.offset -= adsize;
-			extent_trunc(inode, &epos, eloc, etype, elen, nelen);
+			extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
 			epos.offset += adsize;
 			if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
 				printk(KERN_ERR "udf_truncate_tail_extent(): "
@@ -124,7 +124,7 @@ void udf_truncate_tail_extent(struct inode *inode)
 void udf_discard_prealloc(struct inode *inode)
 {
 	struct extent_position epos = { NULL, 0, {0, 0} };
-	kernel_lb_addr eloc;
+	struct kernel_lb_addr eloc;
 	uint32_t elen;
 	uint64_t lbcount = 0;
 	int8_t etype = -1, netype;
@@ -136,9 +136,9 @@ void udf_discard_prealloc(struct inode *inode)
 		return;
 
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		adsize = 0;
 
@@ -152,7 +152,7 @@ void udf_discard_prealloc(struct inode *inode)
 	if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
 		epos.offset -= adsize;
 		lbcount -= elen;
-		extent_trunc(inode, &epos, eloc, etype, elen, 0);
+		extent_trunc(inode, &epos, &eloc, etype, elen, 0);
 		if (!epos.bh) {
 			iinfo->i_lenAlloc =
 				epos.offset -
@@ -200,7 +200,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
 void udf_truncate_extents(struct inode *inode)
 {
 	struct extent_position epos;
-	kernel_lb_addr eloc, neloc = {};
+	struct kernel_lb_addr eloc, neloc = {};
 	uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
 	int8_t etype;
 	struct super_block *sb = inode->i_sb;
@@ -210,9 +210,9 @@ void udf_truncate_extents(struct inode *inode)
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
-		adsize = sizeof(short_ad);
+		adsize = sizeof(struct short_ad);
 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
-		adsize = sizeof(long_ad);
+		adsize = sizeof(struct long_ad);
 	else
 		BUG();
 
@@ -221,7 +221,7 @@ void udf_truncate_extents(struct inode *inode)
 		(inode->i_size & (sb->s_blocksize - 1));
 	if (etype != -1) {
 		epos.offset -= adsize;
-		extent_trunc(inode, &epos, eloc, etype, elen, byte_offset);
+		extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
 		epos.offset += adsize;
 		if (byte_offset)
 			lenalloc = epos.offset;
@@ -236,12 +236,12 @@ void udf_truncate_extents(struct inode *inode)
 		while ((etype = udf_current_aext(inode, &epos, &eloc,
 						 &elen, 0)) != -1) {
 			if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
-				udf_write_aext(inode, &epos, neloc, nelen, 0);
+				udf_write_aext(inode, &epos, &neloc, nelen, 0);
 				if (indirect_ext_len) {
 					/* We managed to free all extents in the
 					 * indirect extent - free it too */
 					BUG_ON(!epos.bh);
-					udf_free_blocks(sb, inode, epos.block,
+					udf_free_blocks(sb, inode, &epos.block,
 							0, indirect_ext_len);
 				} else if (!epos.bh) {
 					iinfo->i_lenAlloc = lenalloc;
@@ -253,7 +253,7 @@ void udf_truncate_extents(struct inode *inode)
 				epos.offset = sizeof(struct allocExtDesc);
 				epos.block = eloc;
 				epos.bh = udf_tread(sb,
-						udf_get_lb_pblock(sb, eloc, 0));
+						udf_get_lb_pblock(sb, &eloc, 0));
 				if (elen)
 					indirect_ext_len =
 						(elen + sb->s_blocksize - 1) >>
@@ -261,7 +261,7 @@ void udf_truncate_extents(struct inode *inode)
 				else
 					indirect_ext_len = 1;
 			} else {
-				extent_trunc(inode, &epos, eloc, etype,
+				extent_trunc(inode, &epos, &eloc, etype,
 					     elen, 0);
 				epos.offset += adsize;
 			}
@@ -269,7 +269,7 @@ void udf_truncate_extents(struct inode *inode)
 
 		if (indirect_ext_len) {
 			BUG_ON(!epos.bh);
-			udf_free_blocks(sb, inode, epos.block, 0,
+			udf_free_blocks(sb, inode, &epos.block, 0,
 					indirect_ext_len);
 		} else if (!epos.bh) {
 			iinfo->i_lenAlloc = lenalloc;
@@ -278,7 +278,7 @@ void udf_truncate_extents(struct inode *inode)
 			udf_update_alloc_ext_desc(inode, &epos, lenalloc);
 	} else if (inode->i_size) {
 		if (byte_offset) {
-			kernel_long_ad extent;
+			struct kernel_long_ad extent;
 
 			/*
 			 *  OK, there is not extent covering inode->i_size and
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index 4f86b1d..e58d1de 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -4,7 +4,7 @@
 struct udf_inode_info {
 	struct timespec		i_crtime;
 	/* Physical address of inode */
-	kernel_lb_addr		i_location;
+	struct kernel_lb_addr		i_location;
 	__u64			i_unique;
 	__u32			i_lenEAttr;
 	__u32			i_lenAlloc;
@@ -17,8 +17,8 @@ struct udf_inode_info {
 	unsigned		i_strat4096 : 1;
 	unsigned		reserved : 26;
 	union {
-		short_ad	*i_sad;
-		long_ad		*i_lad;
+		struct short_ad	*i_sad;
+		struct long_ad		*i_lad;
 		__u8		*i_data;
 	} i_ext;
 	struct inode vfs_inode;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 1c1c514..d113b72 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -30,6 +30,7 @@
 #define UDF_FLAG_GID_SET	16
 #define UDF_FLAG_SESSION_SET	17
 #define UDF_FLAG_LASTBLOCK_SET	18
+#define UDF_FLAG_BLOCKSIZE_SET	19
 
 #define UDF_PART_FLAG_UNALLOC_BITMAP	0x0001
 #define UDF_PART_FLAG_UNALLOC_TABLE	0x0002
@@ -48,6 +49,8 @@
 #define UDF_SPARABLE_MAP15		0x1522U
 #define UDF_METADATA_MAP25		0x2511U
 
+#define UDF_INVALID_MODE		((mode_t)-1)
+
 #pragma pack(1) /* XXX(hch): Why?  This file just defines in-core structures */
 
 struct udf_meta_data {
@@ -114,7 +117,7 @@ struct udf_sb_info {
 
 	/* Sector headers */
 	__s32			s_session;
-	__u32			s_anchor[3];
+	__u32			s_anchor;
 	__u32			s_last_block;
 
 	struct buffer_head	*s_lvid_bh;
@@ -123,6 +126,8 @@ struct udf_sb_info {
 	mode_t			s_umask;
 	gid_t			s_gid;
 	uid_t			s_uid;
+	mode_t			s_fmode;
+	mode_t			s_dmode;
 
 	/* Root Info */
 	struct timespec		s_record_time;
@@ -143,6 +148,8 @@ struct udf_sb_info {
 	struct inode		*s_vat_inode;
 
 	struct mutex		s_alloc_mutex;
+	/* Protected by s_alloc_mutex */
+	unsigned int		s_lvid_dirty;
 };
 
 static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8ec865d..cac51b7 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -62,10 +62,8 @@ static inline size_t udf_ext0_offset(struct inode *inode)
 		return 0;
 }
 
-#define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset))
-
 /* computes tag checksum */
-u8 udf_tag_checksum(const tag *t);
+u8 udf_tag_checksum(const struct tag *t);
 
 struct dentry;
 struct inode;
@@ -95,7 +93,7 @@ struct udf_vds_record {
 };
 
 struct generic_desc {
-	tag		descTag;
+	struct tag	descTag;
 	__le32		volDescSeqNum;
 };
 
@@ -108,11 +106,22 @@ struct ustr {
 struct extent_position {
 	struct buffer_head *bh;
 	uint32_t offset;
-	kernel_lb_addr block;
+	struct kernel_lb_addr block;
 };
 
 /* super.c */
 extern void udf_warning(struct super_block *, const char *, const char *, ...);
+static inline void udf_updated_lvid(struct super_block *sb)
+{
+	struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
+
+	BUG_ON(!bh);
+	WARN_ON_ONCE(((struct logicalVolIntegrityDesc *)
+		     bh->b_data)->integrityType !=
+		     cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN));
+	sb->s_dirt = 1;
+	UDF_SB(sb)->s_lvid_dirty = 1;
+}
 
 /* namei.c */
 extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -124,7 +133,7 @@ extern int udf_ioctl(struct inode *, struct file *, unsigned int,
 		     unsigned long);
 
 /* inode.c */
-extern struct inode *udf_iget(struct super_block *, kernel_lb_addr);
+extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
 extern int udf_sync_inode(struct inode *);
 extern void udf_expand_file_adinicb(struct inode *, int, int *);
 extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
@@ -136,19 +145,19 @@ extern void udf_clear_inode(struct inode *);
 extern int udf_write_inode(struct inode *, int);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
-			   kernel_long_ad *, sector_t);
+			   struct kernel_long_ad *, sector_t);
 extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
-			 kernel_lb_addr *, uint32_t *, sector_t *);
+			 struct kernel_lb_addr *, uint32_t *, sector_t *);
 extern int8_t udf_add_aext(struct inode *, struct extent_position *,
-			   kernel_lb_addr, uint32_t, int);
+			   struct kernel_lb_addr *, uint32_t, int);
 extern int8_t udf_write_aext(struct inode *, struct extent_position *,
-			     kernel_lb_addr, uint32_t, int);
+			     struct kernel_lb_addr *, uint32_t, int);
 extern int8_t udf_delete_aext(struct inode *, struct extent_position,
-			      kernel_lb_addr, uint32_t);
+			      struct kernel_lb_addr, uint32_t);
 extern int8_t udf_next_aext(struct inode *, struct extent_position *,
-			    kernel_lb_addr *, uint32_t *, int);
+			    struct kernel_lb_addr *, uint32_t *, int);
 extern int8_t udf_current_aext(struct inode *, struct extent_position *,
-			       kernel_lb_addr *, uint32_t *, int);
+			       struct kernel_lb_addr *, uint32_t *, int);
 
 /* misc.c */
 extern struct buffer_head *udf_tgetblk(struct super_block *, int);
@@ -160,7 +169,7 @@ extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t,
 extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t,
 					   uint32_t, uint16_t *);
 extern struct buffer_head *udf_read_ptagged(struct super_block *,
-					    kernel_lb_addr, uint32_t,
+					    struct kernel_lb_addr *, uint32_t,
 					    uint16_t *);
 extern void udf_update_tag(char *, int);
 extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int);
@@ -182,6 +191,14 @@ extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t,
 					  uint32_t);
 extern int udf_relocate_blocks(struct super_block *, long, long *);
 
+static inline uint32_t
+udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc,
+		  uint32_t offset)
+{
+	return udf_get_pblock(sb, loc->logicalBlockNum,
+			loc->partitionReferenceNum, offset);
+}
+
 /* unicode.c */
 extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int);
 extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
@@ -200,7 +217,7 @@ extern void udf_truncate_extents(struct inode *);
 
 /* balloc.c */
 extern void udf_free_blocks(struct super_block *, struct inode *,
-			    kernel_lb_addr, uint32_t, uint32_t);
+			    struct kernel_lb_addr *, uint32_t, uint32_t);
 extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t,
 			       uint32_t, uint32_t);
 extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
@@ -214,16 +231,16 @@ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
 						struct udf_fileident_bh *,
 						struct fileIdentDesc *,
 						struct extent_position *,
-						kernel_lb_addr *, uint32_t *,
+						struct kernel_lb_addr *, uint32_t *,
 						sector_t *);
 extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize,
 					       int *offset);
-extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
-extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
+extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
+extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
 
 /* udftime.c */
 extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest,
-						timestamp src);
-extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src);
+						struct timestamp src);
+extern struct timestamp *udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src);
 
 #endif				/* __UDF_DECL_H */
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h
index 489f52f..6a9f3a9 100644
--- a/fs/udf/udfend.h
+++ b/fs/udf/udfend.h
@@ -4,9 +4,9 @@
 #include <asm/byteorder.h>
 #include <linux/string.h>
 
-static inline kernel_lb_addr lelb_to_cpu(lb_addr in)
+static inline struct kernel_lb_addr lelb_to_cpu(struct lb_addr in)
 {
-	kernel_lb_addr out;
+	struct kernel_lb_addr out;
 
 	out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum);
 	out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum);
@@ -14,9 +14,9 @@ static inline kernel_lb_addr lelb_to_cpu(lb_addr in)
 	return out;
 }
 
-static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
+static inline struct lb_addr cpu_to_lelb(struct kernel_lb_addr in)
 {
-	lb_addr out;
+	struct lb_addr out;
 
 	out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum);
 	out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum);
@@ -24,9 +24,9 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
 	return out;
 }
 
-static inline short_ad lesa_to_cpu(short_ad in)
+static inline struct short_ad lesa_to_cpu(struct short_ad in)
 {
-	short_ad out;
+	struct short_ad out;
 
 	out.extLength = le32_to_cpu(in.extLength);
 	out.extPosition = le32_to_cpu(in.extPosition);
@@ -34,9 +34,9 @@ static inline short_ad lesa_to_cpu(short_ad in)
 	return out;
 }
 
-static inline short_ad cpu_to_lesa(short_ad in)
+static inline struct short_ad cpu_to_lesa(struct short_ad in)
 {
-	short_ad out;
+	struct short_ad out;
 
 	out.extLength = cpu_to_le32(in.extLength);
 	out.extPosition = cpu_to_le32(in.extPosition);
@@ -44,9 +44,9 @@ static inline short_ad cpu_to_lesa(short_ad in)
 	return out;
 }
 
-static inline kernel_long_ad lela_to_cpu(long_ad in)
+static inline struct kernel_long_ad lela_to_cpu(struct long_ad in)
 {
-	kernel_long_ad out;
+	struct kernel_long_ad out;
 
 	out.extLength = le32_to_cpu(in.extLength);
 	out.extLocation = lelb_to_cpu(in.extLocation);
@@ -54,9 +54,9 @@ static inline kernel_long_ad lela_to_cpu(long_ad in)
 	return out;
 }
 
-static inline long_ad cpu_to_lela(kernel_long_ad in)
+static inline struct long_ad cpu_to_lela(struct kernel_long_ad in)
 {
-	long_ad out;
+	struct long_ad out;
 
 	out.extLength = cpu_to_le32(in.extLength);
 	out.extLocation = cpu_to_lelb(in.extLocation);
@@ -64,9 +64,9 @@ static inline long_ad cpu_to_lela(kernel_long_ad in)
 	return out;
 }
 
-static inline kernel_extent_ad leea_to_cpu(extent_ad in)
+static inline struct kernel_extent_ad leea_to_cpu(struct extent_ad in)
 {
-	kernel_extent_ad out;
+	struct kernel_extent_ad out;
 
 	out.extLength = le32_to_cpu(in.extLength);
 	out.extLocation = le32_to_cpu(in.extLocation);
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 5f811655..b8c828c 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -85,7 +85,8 @@ extern struct timezone sys_tz;
 #define SECS_PER_HOUR	(60 * 60)
 #define SECS_PER_DAY	(SECS_PER_HOUR * 24)
 
-struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
+struct timespec *
+udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 {
 	int yday;
 	u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
@@ -116,7 +117,8 @@ struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
 	return dest;
 }
 
-timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts)
+struct timestamp *
+udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
 {
 	long int days, rem, y;
 	const unsigned short int *ip;
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 9fdf8c9..cefa8c8 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -254,7 +254,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 {
 	const uint8_t *ocu;
 	uint8_t cmp_id, ocu_len;
-	int i;
+	int i, len;
 
 
 	ocu_len = ocu_i->u_len;
@@ -279,8 +279,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];
 
-		utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
-					      UDF_NAME_LEN - utf_o->u_len);
+		len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
+				    UDF_NAME_LEN - utf_o->u_len);
+		/* Valid character? */
+		if (len >= 0)
+			utf_o->u_len += len;
+		else
+			utf_o->u_name[utf_o->u_len++] = '?';
 	}
 	utf_o->u_cmpID = 8;
 
@@ -290,7 +295,8 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
 			int length)
 {
-	unsigned len, i, max_val;
+	int len;
+	unsigned i, max_val;
 	uint16_t uni_char;
 	int u_len;
 
@@ -302,8 +308,13 @@ try_again:
 	u_len = 0U;
 	for (i = 0U; i < uni->u_len; i++) {
 		len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
-		if (len <= 0)
+		if (!len)
 			continue;
+		/* Invalid character, deal with it */
+		if (len < 0) {
+			len = 1;
+			uni_char = '?';
+		}
 
 		if (uni_char > max_val) {
 			max_val = 0xffffU;
@@ -324,34 +335,43 @@ try_again:
 int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
 		     int flen)
 {
-	struct ustr filename, unifilename;
-	int len;
+	struct ustr *filename, *unifilename;
+	int len = 0;
 
-	if (udf_build_ustr_exact(&unifilename, sname, flen))
+	filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
+	if (!filename)
 		return 0;
 
+	unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
+	if (!unifilename)
+		goto out1;
+
+	if (udf_build_ustr_exact(unifilename, sname, flen))
+		goto out2;
+
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
-		if (!udf_CS0toUTF8(&filename, &unifilename)) {
+		if (!udf_CS0toUTF8(filename, unifilename)) {
 			udf_debug("Failed in udf_get_filename: sname = %s\n",
 				  sname);
-			return 0;
+			goto out2;
 		}
 	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
-		if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename,
-				  &unifilename)) {
+		if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
+				  unifilename)) {
 			udf_debug("Failed in udf_get_filename: sname = %s\n",
 				  sname);
-			return 0;
+			goto out2;
 		}
 	} else
-		return 0;
-
-	len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
-				     unifilename.u_name, unifilename.u_len);
-	if (len)
-		return len;
-
-	return 0;
+		goto out2;
+
+	len = udf_translate_to_linux(dname, filename->u_name, filename->u_len,
+				     unifilename->u_name, unifilename->u_len);
+out2:
+	kfree(unifilename);
+out1:
+	kfree(filename);
+	return len;
 }
 
 int udf_put_filename(struct super_block *sb, const uint8_t *sname,
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c3dc491..60f107e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -33,6 +33,7 @@ xfs-$(CONFIG_XFS_QUOTA)		+= $(addprefix quota/, \
 				   xfs_qm_syscalls.o \
 				   xfs_qm_bhv.o \
 				   xfs_qm.o)
+xfs-$(CONFIG_XFS_QUOTA)		+= linux-2.6/xfs_quotaops.o
 
 ifeq ($(CONFIG_XFS_QUOTA),y)
 xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
deleted file mode 100644
index 2a88d56..0000000
--- a/fs/xfs/linux-2.6/mutex.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_MUTEX_H__
-#define __XFS_SUPPORT_MUTEX_H__
-
-#include <linux/mutex.h>
-
-typedef struct mutex mutex_t;
-
-#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index de3a198..c13f673 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1623,4 +1623,5 @@ const struct address_space_operations xfs_address_space_operations = {
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4bd1123..d0b4994 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -34,6 +34,7 @@
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ioctl.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
@@ -78,92 +79,74 @@ xfs_find_handle(
 	int			hsize;
 	xfs_handle_t		handle;
 	struct inode		*inode;
+	struct file		*file = NULL;
+	struct path		path;
+	int			error;
+	struct xfs_inode	*ip;
 
-	memset((char *)&handle, 0, sizeof(handle));
-
-	switch (cmd) {
-	case XFS_IOC_PATH_TO_FSHANDLE:
-	case XFS_IOC_PATH_TO_HANDLE: {
-		struct path path;
-		int error = user_lpath((const char __user *)hreq->path, &path);
+	if (cmd == XFS_IOC_FD_TO_HANDLE) {
+		file = fget(hreq->fd);
+		if (!file)
+			return -EBADF;
+		inode = file->f_path.dentry->d_inode;
+	} else {
+		error = user_lpath((const char __user *)hreq->path, &path);
 		if (error)
 			return error;
-
-		ASSERT(path.dentry);
-		ASSERT(path.dentry->d_inode);
-		inode = igrab(path.dentry->d_inode);
-		path_put(&path);
-		break;
+		inode = path.dentry->d_inode;
 	}
+	ip = XFS_I(inode);
 
-	case XFS_IOC_FD_TO_HANDLE: {
-		struct file	*file;
-
-		file = fget(hreq->fd);
-		if (!file)
-		    return -EBADF;
+	/*
+	 * We can only generate handles for inodes residing on a XFS filesystem,
+	 * and only for regular files, directories or symbolic links.
+	 */
+	error = -EINVAL;
+	if (inode->i_sb->s_magic != XFS_SB_MAGIC)
+		goto out_put;
 
-		ASSERT(file->f_path.dentry);
-		ASSERT(file->f_path.dentry->d_inode);
-		inode = igrab(file->f_path.dentry->d_inode);
-		fput(file);
-		break;
-	}
+	error = -EBADF;
+	if (!S_ISREG(inode->i_mode) &&
+	    !S_ISDIR(inode->i_mode) &&
+	    !S_ISLNK(inode->i_mode))
+		goto out_put;
 
-	default:
-		ASSERT(0);
-		return -XFS_ERROR(EINVAL);
-	}
 
-	if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
-		/* we're not in XFS anymore, Toto */
-		iput(inode);
-		return -XFS_ERROR(EINVAL);
-	}
+	memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
 
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFREG:
-	case S_IFDIR:
-	case S_IFLNK:
-		break;
-	default:
-		iput(inode);
-		return -XFS_ERROR(EBADF);
-	}
-
-	/* now we can grab the fsid */
-	memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid,
-			sizeof(xfs_fsid_t));
-	hsize = sizeof(xfs_fsid_t);
-
-	if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
-		xfs_inode_t	*ip = XFS_I(inode);
+	if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
+		/*
+		 * This handle only contains an fsid, zero the rest.
+		 */
+		memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
+		hsize = sizeof(xfs_fsid_t);
+	} else {
 		int		lock_mode;
 
-		/* need to get access to the xfs_inode to read the generation */
 		lock_mode = xfs_ilock_map_shared(ip);
-
-		/* fill in fid section of handle from inode */
 		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
 					sizeof(handle.ha_fid.fid_len);
 		handle.ha_fid.fid_pad = 0;
 		handle.ha_fid.fid_gen = ip->i_d.di_gen;
 		handle.ha_fid.fid_ino = ip->i_ino;
-
 		xfs_iunlock_map_shared(ip, lock_mode);
 
 		hsize = XFS_HSIZE(handle);
 	}
 
-	/* now copy our handle into the user buffer & write out the size */
+	error = -EFAULT;
 	if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-	    copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) {
-		iput(inode);
-		return -XFS_ERROR(EFAULT);
-	}
+	    copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
+		goto out_put;
 
-	iput(inode);
-	return 0;
+	error = 0;
+
+ out_put:
+	if (cmd == XFS_IOC_FD_TO_HANDLE)
+		fput(file);
+	else
+		path_put(&path);
+	return error;
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2940612..6075382 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -211,8 +211,13 @@ xfs_vn_mknod(
 	 * Irix uses Missed'em'V split, but doesn't want to see
 	 * the upper 5 bits of (14bit) major.
 	 */
-	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
-		return -EINVAL;
+	if (S_ISCHR(mode) || S_ISBLK(mode)) {
+		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+			return -EINVAL;
+		rdev = sysv_encode_dev(rdev);
+	} else {
+		rdev = 0;
+	}
 
 	if (test_default_acl && test_default_acl(dir)) {
 		if (!_ACL_ALLOC(default_acl)) {
@@ -224,28 +229,11 @@ xfs_vn_mknod(
 		}
 	}
 
-	xfs_dentry_to_name(&name, dentry);
-
 	if (IS_POSIXACL(dir) && !default_acl)
 		mode &= ~current_umask();
 
-	switch (mode & S_IFMT) {
-	case S_IFCHR:
-	case S_IFBLK:
-	case S_IFIFO:
-	case S_IFSOCK:
-		rdev = sysv_encode_dev(rdev);
-	case S_IFREG:
-		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
-		break;
-	case S_IFDIR:
-		error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL);
-		break;
-	default:
-		error = EINVAL;
-		break;
-	}
-
+	xfs_dentry_to_name(&name, dentry);
+	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
 	if (unlikely(error))
 		goto out_free_acl;
 
@@ -553,9 +541,6 @@ xfs_vn_getattr(
 	stat->uid = ip->i_d.di_uid;
 	stat->gid = ip->i_d.di_gid;
 	stat->ino = ip->i_ino;
-#if XFS_BIG_INUMS
-	stat->ino += mp->m_inoadd;
-#endif
 	stat->atime = inode->i_atime;
 	stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
 	stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 507492d..f65a53f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -38,7 +38,6 @@
 #include <kmem.h>
 #include <mrlock.h>
 #include <sv.h>
-#include <mutex.h>
 #include <time.h>
 
 #include <support/ktrace.h>
@@ -51,6 +50,7 @@
 #include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/errno.h>
@@ -147,17 +147,6 @@
 #define SYNCHRONIZE()	barrier()
 #define __return_address __builtin_return_address(0)
 
-/*
- * IRIX (BSD) quotactl makes use of separate commands for user/group,
- * whereas on Linux the syscall encodes this information into the cmd
- * field (see the QCMD macro in quota.h).  These macros help keep the
- * code portable - they are not visible from the syscall interface.
- */
-#define Q_XSETGQLIM	XQM_CMD(8)	/* set groups disk limits */
-#define Q_XGETGQUOTA	XQM_CMD(9)	/* get groups disk limits */
-#define Q_XSETPQLIM	XQM_CMD(10)	/* set projects disk limits */
-#define Q_XGETPQUOTA	XQM_CMD(11)	/* get projects disk limits */
-
 #define dfltprid	0
 #define MAXPATHLEN	1024
 
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
new file mode 100644
index 0000000..94d9a63
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_dmapi.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "quota/xfs_qm.h"
+#include <linux/quota.h>
+
+
+STATIC int
+xfs_quota_type(int type)
+{
+	switch (type) {
+	case USRQUOTA:
+		return XFS_DQ_USER;
+	case GRPQUOTA:
+		return XFS_DQ_GROUP;
+	default:
+		return XFS_DQ_PROJ;
+	}
+}
+
+STATIC int
+xfs_fs_quota_sync(
+	struct super_block	*sb,
+	int			type)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	return -xfs_sync_inodes(mp, SYNC_DELWRI);
+}
+
+STATIC int
+xfs_fs_get_xstate(
+	struct super_block	*sb,
+	struct fs_quota_stat	*fqs)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	return -xfs_qm_scall_getqstat(mp, fqs);
+}
+
+STATIC int
+xfs_fs_set_xstate(
+	struct super_block	*sb,
+	unsigned int		uflags,
+	int			op)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	unsigned int		flags = 0;
+
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (uflags & XFS_QUOTA_UDQ_ACCT)
+		flags |= XFS_UQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_PDQ_ACCT)
+		flags |= XFS_PQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_GDQ_ACCT)
+		flags |= XFS_GQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_UDQ_ENFD)
+		flags |= XFS_UQUOTA_ENFD;
+	if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+		flags |= XFS_OQUOTA_ENFD;
+
+	switch (op) {
+	case Q_XQUOTAON:
+		return -xfs_qm_scall_quotaon(mp, flags);
+	case Q_XQUOTAOFF:
+		if (!XFS_IS_QUOTA_ON(mp))
+			return -EINVAL;
+		return -xfs_qm_scall_quotaoff(mp, flags);
+	case Q_XQUOTARM:
+		if (XFS_IS_QUOTA_ON(mp))
+			return -EINVAL;
+		return -xfs_qm_scall_trunc_qfiles(mp, flags);
+	}
+
+	return -EINVAL;
+}
+
+STATIC int
+xfs_fs_get_xquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	if (!XFS_IS_QUOTA_ON(mp))
+		return -ESRCH;
+
+	return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+}
+
+STATIC int
+xfs_fs_set_xquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	if (!XFS_IS_QUOTA_ON(mp))
+		return -ESRCH;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+}
+
+struct quotactl_ops xfs_quotactl_operations = {
+	.quota_sync		= xfs_fs_quota_sync,
+	.get_xstate		= xfs_fs_get_xstate,
+	.set_xstate		= xfs_fs_set_xstate,
+	.get_xquota		= xfs_fs_get_xquota,
+	.set_xquota		= xfs_fs_set_xquota,
+};
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 32ae502..bb68526 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -68,7 +68,6 @@
 #include <linux/freezer.h>
 #include <linux/parser.h>
 
-static struct quotactl_ops xfs_quotactl_operations;
 static struct super_operations xfs_super_operations;
 static kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
@@ -79,7 +78,6 @@ mempool_t *xfs_ioend_pool;
 #define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
 #define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
 #define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
-#define MNTOPT_INO64	"ino64"		/* force inodes into 64-bit range */
 #define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
 #define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
 #define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
@@ -180,7 +178,7 @@ xfs_parseargs(
 	int			dswidth = 0;
 	int			iosize = 0;
 	int			dmapi_implies_ikeep = 1;
-	uchar_t			iosizelog = 0;
+	__uint8_t		iosizelog = 0;
 
 	/*
 	 * Copy binary VFS mount flags we are interested in.
@@ -291,16 +289,6 @@ xfs_parseargs(
 			mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
 		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
 			mp->m_flags |= XFS_MOUNT_NORECOVERY;
-		} else if (!strcmp(this_char, MNTOPT_INO64)) {
-#if XFS_BIG_INUMS
-			mp->m_flags |= XFS_MOUNT_INO64;
-			mp->m_inoadd = XFS_INO64_OFFSET;
-#else
-			cmn_err(CE_WARN,
-				"XFS: %s option not allowed on this system",
-				this_char);
-			return EINVAL;
-#endif
 		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
 			mp->m_flags |= XFS_MOUNT_NOALIGN;
 		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
@@ -529,7 +517,6 @@ xfs_showargs(
 		/* the few simple ones we can get from the mount struct */
 		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
 		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
-		{ XFS_MOUNT_INO64,		"," MNTOPT_INO64 },
 		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
 		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
@@ -634,7 +621,7 @@ xfs_max_file_offset(
 	return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
 
-int
+STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
 	const char		*name,
@@ -651,7 +638,7 @@ xfs_blkdev_get(
 	return -error;
 }
 
-void
+STATIC void
 xfs_blkdev_put(
 	struct block_device	*bdev)
 {
@@ -872,7 +859,7 @@ xfsaild_wakeup(
 	wake_up_process(ailp->xa_task);
 }
 
-int
+STATIC int
 xfsaild(
 	void	*data)
 {
@@ -990,26 +977,57 @@ xfs_fs_write_inode(
 	int			sync)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
 	int			error = 0;
-	int			flags = 0;
 
 	xfs_itrace_entry(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
 	if (sync) {
 		error = xfs_wait_on_pages(ip, 0, -1);
 		if (error)
-			goto out_error;
-		flags |= FLUSH_SYNC;
+			goto out;
 	}
-	error = xfs_inode_flush(ip, flags);
 
-out_error:
+	/*
+	 * Bypass inodes which have already been cleaned by
+	 * the inode flush clustering code inside xfs_iflush
+	 */
+	if (xfs_inode_clean(ip))
+		goto out;
+
+	/*
+	 * We make this non-blocking if the inode is contended, return
+	 * EAGAIN to indicate to the caller that they did not succeed.
+	 * This prevents the flush path from blocking on inodes inside
+	 * another operation right now, they get caught later by xfs_sync.
+	 */
+	if (sync) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		xfs_iflock(ip);
+
+		error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+	} else {
+		error = EAGAIN;
+		if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+			goto out;
+		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+			goto out_unlock;
+
+		error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
+	}
+
+ out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
 	/*
 	 * if we failed to write out the inode then mark
 	 * it dirty again so we'll try again later.
 	 */
 	if (error)
 		xfs_mark_inode_dirty_sync(ip);
-
 	return -error;
 }
 
@@ -1169,18 +1187,12 @@ xfs_fs_statfs(
 	statp->f_bfree = statp->f_bavail =
 				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-#if XFS_BIG_INUMS
-	fakeinos += mp->m_inoadd;
-#endif
 	statp->f_files =
 	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
 	if (mp->m_maxicount)
-#if XFS_BIG_INUMS
-		if (!mp->m_inoadd)
-#endif
-			statp->f_files = min_t(typeof(statp->f_files),
-						statp->f_files,
-						mp->m_maxicount);
+		statp->f_files = min_t(typeof(statp->f_files),
+					statp->f_files,
+					mp->m_maxicount);
 	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
 	spin_unlock(&mp->m_sb_lock);
 
@@ -1302,57 +1314,6 @@ xfs_fs_show_options(
 	return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
 }
 
-STATIC int
-xfs_fs_quotasync(
-	struct super_block	*sb,
-	int			type)
-{
-	return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XQUOTASYNC, 0, NULL);
-}
-
-STATIC int
-xfs_fs_getxstate(
-	struct super_block	*sb,
-	struct fs_quota_stat	*fqs)
-{
-	return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
-}
-
-STATIC int
-xfs_fs_setxstate(
-	struct super_block	*sb,
-	unsigned int		flags,
-	int			op)
-{
-	return -XFS_QM_QUOTACTL(XFS_M(sb), op, 0, (caddr_t)&flags);
-}
-
-STATIC int
-xfs_fs_getxquota(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	return -XFS_QM_QUOTACTL(XFS_M(sb),
-				 (type == USRQUOTA) ? Q_XGETQUOTA :
-				  ((type == GRPQUOTA) ? Q_XGETGQUOTA :
-				   Q_XGETPQUOTA), id, (caddr_t)fdq);
-}
-
-STATIC int
-xfs_fs_setxquota(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	return -XFS_QM_QUOTACTL(XFS_M(sb),
-				 (type == USRQUOTA) ? Q_XSETQLIM :
-				  ((type == GRPQUOTA) ? Q_XSETGQLIM :
-				   Q_XSETPQLIM), id, (caddr_t)fdq);
-}
-
 /*
  * This function fills in xfs_mount_t fields based on mount args.
  * Note: the superblock _has_ now been read in.
@@ -1435,7 +1396,9 @@ xfs_fs_fill_super(
 	sb_min_blocksize(sb, BBSIZE);
 	sb->s_xattr = xfs_xattr_handlers;
 	sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
 	sb->s_qcop = &xfs_quotactl_operations;
+#endif
 	sb->s_op = &xfs_super_operations;
 
 	error = xfs_dmops_get(mp);
@@ -1578,14 +1541,6 @@ static struct super_operations xfs_super_operations = {
 	.show_options		= xfs_fs_show_options,
 };
 
-static struct quotactl_ops xfs_quotactl_operations = {
-	.quota_sync		= xfs_fs_quotasync,
-	.get_xstate		= xfs_fs_getxstate,
-	.set_xstate		= xfs_fs_setxstate,
-	.get_xquota		= xfs_fs_getxquota,
-	.set_xquota		= xfs_fs_setxquota,
-};
-
 static struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index d5d776d..5a2ea3a 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,6 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
 extern struct xattr_handler *xfs_xattr_handlers[];
+extern struct quotactl_ops xfs_quotactl_operations;
 
 #define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 5f6de1e..04f058c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -19,6 +19,7 @@
 #define XFS_SYNC_H 1
 
 struct xfs_mount;
+struct xfs_perag;
 
 typedef struct bhv_vfs_sync_work {
 	struct list_head	w_list;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f65983a..ad7fbea 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -41,11 +41,6 @@ struct attrlist_cursor_kern;
 #define IO_INVIS	0x00020		/* don't update inode timestamps */
 
 /*
- * Flags for xfs_inode_flush
- */
-#define FLUSH_SYNC		1	/* wait for flush to complete	*/
-
-/*
  * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
  */
 #define FI_NONE			0	/* none */
@@ -55,33 +50,6 @@ struct attrlist_cursor_kern;
 					   the operation completes. */
 
 /*
- * Dealing with bad inodes
- */
-static inline int VN_BAD(struct inode *vp)
-{
-	return is_bad_inode(vp);
-}
-
-/*
- * Extracting atime values in various formats
- */
-static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)
-{
-	bs_atime->tv_sec = vp->i_atime.tv_sec;
-	bs_atime->tv_nsec = vp->i_atime.tv_nsec;
-}
-
-static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)
-{
-	*ts = vp->i_atime;
-}
-
-static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
-{
-	*tt = vp->i_atime.tv_sec;
-}
-
-/*
  * Some useful predicates.
  */
 #define VN_MAPPED(vp)	mapping_mapped(vp->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 6543c0b..e4babcc 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -804,7 +804,7 @@ xfs_qm_dqlookup(
 	uint			flist_locked;
 	xfs_dquot_t		*d;
 
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+	ASSERT(mutex_is_locked(&qh->qh_lock));
 
 	flist_locked = B_FALSE;
 
@@ -877,7 +877,7 @@ xfs_qm_dqlookup(
 			/*
 			 * move the dquot to the front of the hashchain
 			 */
-			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+			ASSERT(mutex_is_locked(&qh->qh_lock));
 			if (dqp->HL_PREVP != &qh->qh_next) {
 				xfs_dqtrace_entry(dqp,
 						  "DQLOOKUP: HASH MOVETOFRONT");
@@ -892,13 +892,13 @@ xfs_qm_dqlookup(
 			}
 			xfs_dqtrace_entry(dqp, "LOOKUP END");
 			*O_dqpp = dqp;
-			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+			ASSERT(mutex_is_locked(&qh->qh_lock));
 			return (0);
 		}
 	}
 
 	*O_dqpp = NULL;
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+	ASSERT(mutex_is_locked(&qh->qh_lock));
 	return (1);
 }
 
@@ -956,7 +956,7 @@ xfs_qm_dqget(
 			ASSERT(ip->i_gdquot == NULL);
 	}
 #endif
-	XFS_DQ_HASH_LOCK(h);
+	mutex_lock(&h->qh_lock);
 
 	/*
 	 * Look in the cache (hashtable).
@@ -971,7 +971,7 @@ xfs_qm_dqget(
 		 */
 		ASSERT(*O_dqpp);
 		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-		XFS_DQ_HASH_UNLOCK(h);
+		mutex_unlock(&h->qh_lock);
 		xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
 		return (0);	/* success */
 	}
@@ -991,7 +991,7 @@ xfs_qm_dqget(
 	 * we don't keep the lock across a disk read
 	 */
 	version = h->qh_version;
-	XFS_DQ_HASH_UNLOCK(h);
+	mutex_unlock(&h->qh_lock);
 
 	/*
 	 * Allocate the dquot on the kernel heap, and read the ondisk
@@ -1056,7 +1056,7 @@ xfs_qm_dqget(
 	/*
 	 * Hashlock comes after ilock in lock order
 	 */
-	XFS_DQ_HASH_LOCK(h);
+	mutex_lock(&h->qh_lock);
 	if (version != h->qh_version) {
 		xfs_dquot_t *tmpdqp;
 		/*
@@ -1072,7 +1072,7 @@ xfs_qm_dqget(
 			 * and start over.
 			 */
 			xfs_qm_dqput(tmpdqp);
-			XFS_DQ_HASH_UNLOCK(h);
+			mutex_unlock(&h->qh_lock);
 			xfs_qm_dqdestroy(dqp);
 			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
 			goto again;
@@ -1083,7 +1083,7 @@ xfs_qm_dqget(
 	 * Put the dquot at the beginning of the hash-chain and mp's list
 	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
 	 */
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
+	ASSERT(mutex_is_locked(&h->qh_lock));
 	dqp->q_hash = h;
 	XQM_HASHLIST_INSERT(h, dqp);
 
@@ -1102,7 +1102,7 @@ xfs_qm_dqget(
 	XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
 
 	xfs_qm_mplist_unlock(mp);
-	XFS_DQ_HASH_UNLOCK(h);
+	mutex_unlock(&h->qh_lock);
  dqret:
 	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	xfs_dqtrace_entry(dqp, "DQGET DONE");
@@ -1440,7 +1440,7 @@ xfs_qm_dqpurge(
 	xfs_mount_t	*mp = dqp->q_mount;
 
 	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
+	ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
 
 	xfs_dqlock(dqp);
 	/*
@@ -1453,7 +1453,7 @@ xfs_qm_dqpurge(
 	 */
 	if (dqp->q_nrefs != 0) {
 		xfs_dqunlock(dqp);
-		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+		mutex_unlock(&dqp->q_hash->qh_lock);
 		return (1);
 	}
 
@@ -1517,7 +1517,7 @@ xfs_qm_dqpurge(
 	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
-	XFS_DQ_HASH_UNLOCK(thishash);
+	mutex_unlock(&thishash->qh_lock);
 	return (0);
 }
 
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index d443e93..de0f402 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -34,7 +34,7 @@
  */
 typedef struct xfs_dqhash {
 	struct xfs_dquot *qh_next;
-	mutex_t		  qh_lock;
+	struct mutex	  qh_lock;
 	uint		  qh_version;	/* ever increasing version */
 	uint		  qh_nelems;	/* number of dquots on the list */
 } xfs_dqhash_t;
@@ -81,7 +81,7 @@ typedef struct xfs_dquot {
 	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
 	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
 	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
-	mutex_t		 q_qlock;	/* quota lock */
+	struct mutex	 q_qlock;	/* quota lock */
 	struct completion q_flush;	/* flush completion queue */
 	atomic_t          q_pincount;	/* dquot pin count */
 	wait_queue_head_t q_pinwait;	/* dquot pinning wait queue */
@@ -109,19 +109,6 @@ enum {
 
 #define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
 
-#ifdef DEBUG
-static inline int
-XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
-{
-	if (mutex_trylock(&dqp->q_qlock)) {
-		mutex_unlock(&dqp->q_qlock);
-		return 0;
-	}
-	return 1;
-}
-#endif
-
-
 /*
  * Manage the q_flush completion queue embedded in the dquot.  This completion
  * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -142,6 +129,7 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
 	complete(&dqp->q_flush);
 }
 
+#define XFS_DQ_IS_LOCKED(dqp)	(mutex_is_locked(&((dqp)->q_qlock)))
 #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7a2beb6..5b66950 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -55,7 +55,7 @@
  * quota functionality, including maintaining the freelist and hash
  * tables of dquots.
  */
-mutex_t		xfs_Gqm_lock;
+struct mutex	xfs_Gqm_lock;
 struct xfs_qm	*xfs_Gqm;
 uint		ndquot;
 
@@ -69,8 +69,6 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC void	xfs_qm_freelist_init(xfs_frlist_t *);
 STATIC void	xfs_qm_freelist_destroy(xfs_frlist_t *);
-STATIC int	xfs_qm_mplist_nowait(xfs_mount_t *);
-STATIC int	xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
@@ -82,7 +80,7 @@ static struct shrinker xfs_qm_shaker = {
 };
 
 #ifdef DEBUG
-extern mutex_t	qcheck_lock;
+extern struct mutex	qcheck_lock;
 #endif
 
 #ifdef QUOTADEBUG
@@ -219,7 +217,7 @@ xfs_qm_hold_quotafs_ref(
 	 * the structure could disappear between the entry to this routine and
 	 * a HOLD operation if not locked.
 	 */
-	XFS_QM_LOCK(xfs_Gqm);
+	mutex_lock(&xfs_Gqm_lock);
 
 	if (xfs_Gqm == NULL)
 		xfs_Gqm = xfs_Gqm_init();
@@ -228,8 +226,8 @@ xfs_qm_hold_quotafs_ref(
 	 * debugging and statistical purposes, but ...
 	 * Just take a reference and get out.
 	 */
-	XFS_QM_HOLD(xfs_Gqm);
-	XFS_QM_UNLOCK(xfs_Gqm);
+	xfs_Gqm->qm_nrefs++;
+	mutex_unlock(&xfs_Gqm_lock);
 
 	return 0;
 }
@@ -277,13 +275,12 @@ xfs_qm_rele_quotafs_ref(
 	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
 	 * be restarted.
 	 */
-	XFS_QM_LOCK(xfs_Gqm);
-	XFS_QM_RELE(xfs_Gqm);
-	if (xfs_Gqm->qm_nrefs == 0) {
+	mutex_lock(&xfs_Gqm_lock);
+	if (--xfs_Gqm->qm_nrefs == 0) {
 		xfs_qm_destroy(xfs_Gqm);
 		xfs_Gqm = NULL;
 	}
-	XFS_QM_UNLOCK(xfs_Gqm);
+	mutex_unlock(&xfs_Gqm_lock);
 }
 
 /*
@@ -577,10 +574,10 @@ xfs_qm_dqpurge_int(
 			continue;
 		}
 
-		if (! xfs_qm_dqhashlock_nowait(dqp)) {
+		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
 			nrecl = XFS_QI_MPLRECLAIMS(mp);
 			xfs_qm_mplist_unlock(mp);
-			XFS_DQ_HASH_LOCK(dqp->q_hash);
+			mutex_lock(&dqp->q_hash->qh_lock);
 			xfs_qm_mplist_lock(mp);
 
 			/*
@@ -590,7 +587,7 @@ xfs_qm_dqpurge_int(
 			 * this point, but somebody might be taking things off.
 			 */
 			if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
-				XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+				mutex_unlock(&dqp->q_hash->qh_lock);
 				goto again;
 			}
 		}
@@ -632,7 +629,6 @@ xfs_qm_dqattach_one(
 	xfs_dqid_t	id,
 	uint		type,
 	uint		doalloc,
-	uint		dolock,
 	xfs_dquot_t	*udqhint, /* hint */
 	xfs_dquot_t	**IO_idqpp)
 {
@@ -641,16 +637,16 @@ xfs_qm_dqattach_one(
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	error = 0;
+
 	/*
 	 * See if we already have it in the inode itself. IO_idqpp is
 	 * &i_udquot or &i_gdquot. This made the code look weird, but
 	 * made the logic a lot simpler.
 	 */
-	if ((dqp = *IO_idqpp)) {
-		if (dolock)
-			xfs_dqlock(dqp);
+	dqp = *IO_idqpp;
+	if (dqp) {
 		xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
-		goto done;
+		return 0;
 	}
 
 	/*
@@ -659,38 +655,38 @@ xfs_qm_dqattach_one(
 	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
 	 * the user dquot.
 	 */
-	ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-	if (udqhint && !dolock)
+	if (udqhint) {
+		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
 		xfs_dqlock(udqhint);
 
-	/*
-	 * No need to take dqlock to look at the id.
-	 * The ID can't change until it gets reclaimed, and it won't
-	 * be reclaimed as long as we have a ref from inode and we hold
-	 * the ilock.
-	 */
-	if (udqhint &&
-	    (dqp = udqhint->q_gdquot) &&
-	    (be32_to_cpu(dqp->q_core.d_id) == id)) {
-		ASSERT(XFS_DQ_IS_LOCKED(udqhint));
-		xfs_dqlock(dqp);
-		XFS_DQHOLD(dqp);
-		ASSERT(*IO_idqpp == NULL);
-		*IO_idqpp = dqp;
-		if (!dolock) {
+		/*
+		 * No need to take dqlock to look at the id.
+		 *
+		 * The ID can't change until it gets reclaimed, and it won't
+		 * be reclaimed as long as we have a ref from inode and we
+		 * hold the ilock.
+		 */
+		dqp = udqhint->q_gdquot;
+		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
+			xfs_dqlock(dqp);
+			XFS_DQHOLD(dqp);
+			ASSERT(*IO_idqpp == NULL);
+			*IO_idqpp = dqp;
+
 			xfs_dqunlock(dqp);
 			xfs_dqunlock(udqhint);
+			return 0;
 		}
-		goto done;
-	}
-	/*
-	 * We can't hold a dquot lock when we call the dqget code.
-	 * We'll deadlock in no time, because of (not conforming to)
-	 * lock ordering - the inodelock comes before any dquot lock,
-	 * and we may drop and reacquire the ilock in xfs_qm_dqget().
-	 */
-	if (udqhint)
+
+		/*
+		 * We can't hold a dquot lock when we call the dqget code.
+		 * We'll deadlock in no time, because of (not conforming to)
+		 * lock ordering - the inodelock comes before any dquot lock,
+		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
+		 */
 		xfs_dqunlock(udqhint);
+	}
+
 	/*
 	 * Find the dquot from somewhere. This bumps the
 	 * reference count of dquot and returns it locked.
@@ -698,48 +694,19 @@ xfs_qm_dqattach_one(
 	 * disk and we didn't ask it to allocate;
 	 * ESRCH if quotas got turned off suddenly.
 	 */
-	if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
-				 doalloc|XFS_QMOPT_DOWARN, &dqp))) {
-		if (udqhint && dolock)
-			xfs_dqlock(udqhint);
-		goto done;
-	}
+	error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+	if (error)
+		return error;
 
 	xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
+
 	/*
 	 * dqget may have dropped and re-acquired the ilock, but it guarantees
 	 * that the dquot returned is the one that should go in the inode.
 	 */
 	*IO_idqpp = dqp;
-	ASSERT(dqp);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (! dolock) {
-		xfs_dqunlock(dqp);
-		goto done;
-	}
-	if (! udqhint)
-		goto done;
-
-	ASSERT(udqhint);
-	ASSERT(dolock);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (! xfs_qm_dqlock_nowait(udqhint)) {
-		xfs_dqunlock(dqp);
-		xfs_dqlock(udqhint);
-		xfs_dqlock(dqp);
-	}
-      done:
-#ifdef QUOTADEBUG
-	if (udqhint) {
-		if (dolock)
-			ASSERT(XFS_DQ_IS_LOCKED(udqhint));
-	}
-	if (! error) {
-		if (dolock)
-			ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	}
-#endif
-	return error;
+	xfs_dqunlock(dqp);
+	return 0;
 }
 
 
@@ -754,24 +721,15 @@ xfs_qm_dqattach_one(
 STATIC void
 xfs_qm_dqattach_grouphint(
 	xfs_dquot_t	*udq,
-	xfs_dquot_t	*gdq,
-	uint		locked)
+	xfs_dquot_t	*gdq)
 {
 	xfs_dquot_t	*tmp;
 
-#ifdef QUOTADEBUG
-	if (locked) {
-		ASSERT(XFS_DQ_IS_LOCKED(udq));
-		ASSERT(XFS_DQ_IS_LOCKED(gdq));
-	}
-#endif
-	if (! locked)
-		xfs_dqlock(udq);
+	xfs_dqlock(udq);
 
 	if ((tmp = udq->q_gdquot)) {
 		if (tmp == gdq) {
-			if (! locked)
-				xfs_dqunlock(udq);
+			xfs_dqunlock(udq);
 			return;
 		}
 
@@ -781,8 +739,6 @@ xfs_qm_dqattach_grouphint(
 		 * because the freelist lock comes before dqlocks.
 		 */
 		xfs_dqunlock(udq);
-		if (locked)
-			xfs_dqunlock(gdq);
 		/*
 		 * we took a hard reference once upon a time in dqget,
 		 * so give it back when the udquot no longer points at it
@@ -795,9 +751,7 @@ xfs_qm_dqattach_grouphint(
 
 	} else {
 		ASSERT(XFS_DQ_IS_LOCKED(udq));
-		if (! locked) {
-			xfs_dqlock(gdq);
-		}
+		xfs_dqlock(gdq);
 	}
 
 	ASSERT(XFS_DQ_IS_LOCKED(udq));
@@ -810,10 +764,9 @@ xfs_qm_dqattach_grouphint(
 		XFS_DQHOLD(gdq);
 		udq->q_gdquot = gdq;
 	}
-	if (! locked) {
-		xfs_dqunlock(gdq);
-		xfs_dqunlock(udq);
-	}
+
+	xfs_dqunlock(gdq);
+	xfs_dqunlock(udq);
 }
 
 
@@ -821,8 +774,6 @@ xfs_qm_dqattach_grouphint(
  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
  * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
- * much made this code a complete mess, but it has been pretty useful.
  * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
  * Inode may get unlocked and relocked in here, and the caller must deal with
  * the consequences.
@@ -851,7 +802,6 @@ xfs_qm_dqattach(
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
 						flags & XFS_QMOPT_DQALLOC,
-						flags & XFS_QMOPT_DQLOCK,
 						NULL, &ip->i_udquot);
 		if (error)
 			goto done;
@@ -863,11 +813,9 @@ xfs_qm_dqattach(
 		error = XFS_IS_GQUOTA_ON(mp) ?
 			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
 						flags & XFS_QMOPT_DQALLOC,
-						flags & XFS_QMOPT_DQLOCK,
 						ip->i_udquot, &ip->i_gdquot) :
 			xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
 						flags & XFS_QMOPT_DQALLOC,
-						flags & XFS_QMOPT_DQLOCK,
 						ip->i_udquot, &ip->i_gdquot);
 		/*
 		 * Don't worry about the udquot that we may have
@@ -898,22 +846,13 @@ xfs_qm_dqattach(
 		/*
 		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
 		 */
-		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
-					 flags & XFS_QMOPT_DQLOCK);
+		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 	}
 
       done:
 
 #ifdef QUOTADEBUG
 	if (! error) {
-		if (ip->i_udquot) {
-			if (flags & XFS_QMOPT_DQLOCK)
-				ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
-		}
-		if (ip->i_gdquot) {
-			if (flags & XFS_QMOPT_DQLOCK)
-				ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
-		}
 		if (XFS_IS_UQUOTA_ON(mp))
 			ASSERT(ip->i_udquot);
 		if (XFS_IS_OQUOTA_ON(mp))
@@ -2086,7 +2025,7 @@ xfs_qm_shake_freelist(
 		 * a dqlookup process that holds the hashlock that is
 		 * waiting for the freelist lock.
 		 */
-		if (! xfs_qm_dqhashlock_nowait(dqp)) {
+		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
 			xfs_dqfunlock(dqp);
 			xfs_dqunlock(dqp);
 			dqp = dqp->dq_flnext;
@@ -2103,7 +2042,7 @@ xfs_qm_shake_freelist(
 			/* XXX put a sentinel so that we can come back here */
 			xfs_dqfunlock(dqp);
 			xfs_dqunlock(dqp);
-			XFS_DQ_HASH_UNLOCK(hash);
+			mutex_unlock(&hash->qh_lock);
 			xfs_qm_freelist_unlock(xfs_Gqm);
 			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
 				return nreclaimed;
@@ -2120,7 +2059,7 @@ xfs_qm_shake_freelist(
 		XQM_HASHLIST_REMOVE(hash, dqp);
 		xfs_dqfunlock(dqp);
 		xfs_qm_mplist_unlock(dqp->q_mount);
-		XFS_DQ_HASH_UNLOCK(hash);
+		mutex_unlock(&hash->qh_lock);
 
  off_freelist:
 		XQM_FREELIST_REMOVE(dqp);
@@ -2262,7 +2201,7 @@ xfs_qm_dqreclaim_one(void)
 			continue;
 		}
 
-		if (! xfs_qm_dqhashlock_nowait(dqp))
+		if (!mutex_trylock(&dqp->q_hash->qh_lock))
 			goto mplistunlock;
 
 		ASSERT(dqp->q_nrefs == 0);
@@ -2271,7 +2210,7 @@ xfs_qm_dqreclaim_one(void)
 		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
 		XQM_FREELIST_REMOVE(dqp);
 		dqpout = dqp;
-		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+		mutex_unlock(&dqp->q_hash->qh_lock);
  mplistunlock:
 		xfs_qm_mplist_unlock(dqp->q_mount);
 		xfs_dqfunlock(dqp);
@@ -2774,34 +2713,3 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
 {
 	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
 }
-
-STATIC int
-xfs_qm_dqhashlock_nowait(
-	xfs_dquot_t *dqp)
-{
-	int locked;
-
-	locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
-	return locked;
-}
-
-int
-xfs_qm_freelist_lock_nowait(
-	xfs_qm_t *xqm)
-{
-	int locked;
-
-	locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
-	return locked;
-}
-
-STATIC int
-xfs_qm_mplist_nowait(
-	xfs_mount_t	*mp)
-{
-	int locked;
-
-	ASSERT(mp->m_quotainfo);
-	locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
-	return locked;
-}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index ddf0916..a371954 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -27,7 +27,7 @@ struct xfs_qm;
 struct xfs_inode;
 
 extern uint		ndquot;
-extern mutex_t		xfs_Gqm_lock;
+extern struct mutex	xfs_Gqm_lock;
 extern struct xfs_qm	*xfs_Gqm;
 extern kmem_zone_t	*qm_dqzone;
 extern kmem_zone_t	*qm_dqtrxzone;
@@ -79,7 +79,7 @@ typedef xfs_dqhash_t	xfs_dqlist_t;
 typedef struct xfs_frlist {
        struct xfs_dquot *qh_next;
        struct xfs_dquot *qh_prev;
-       mutex_t		 qh_lock;
+       struct mutex	 qh_lock;
        uint		 qh_version;
        uint		 qh_nelems;
 } xfs_frlist_t;
@@ -115,7 +115,7 @@ typedef struct xfs_quotainfo {
 	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
 	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
 	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
-	mutex_t		 qi_quotaofflock;/* to serialize quotaoff */
+	struct mutex	 qi_quotaofflock;/* to serialize quotaoff */
 	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
 	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
 	xfs_qcnt_t	 qi_bhardlimit;	 /* default data blk hard limit */
@@ -158,11 +158,6 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_IWARNLIMIT	5
 #define XFS_QM_RTBWARNLIMIT	5
 
-#define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock))
-#define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
-#define XFS_QM_HOLD(xqm)	((xqm)->qm_nrefs++)
-#define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)
-
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
 extern void		xfs_qm_mount_quotas(xfs_mount_t *);
 extern int		xfs_qm_quotacheck(xfs_mount_t *);
@@ -178,6 +173,16 @@ extern void		xfs_qm_dqdetach(xfs_inode_t *);
 extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
+/* quota ops */
+extern int		xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+extern int		xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+extern int		xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
+
 /* vop stuff */
 extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
 					uid_t, gid_t, prid_t, uint,
@@ -194,11 +199,6 @@ extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
 /* list stuff */
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
-extern int		xfs_qm_freelist_lock_nowait(xfs_qm_t *);
-
-/* system call interface */
-extern int		xfs_qm_quotactl(struct xfs_mount *, int, int,
-				xfs_caddr_t);
 
 #ifdef DEBUG
 extern int		xfs_qm_internalqcheck(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index bc6c5cc..63037c6 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -235,7 +235,6 @@ struct xfs_qmops xfs_qmcore_xfs = {
 	.xfs_dqvopchownresv	= xfs_qm_vop_chown_reserve,
 	.xfs_dqstatvfs		= xfs_qm_statvfs,
 	.xfs_dqsync		= xfs_qm_sync,
-	.xfs_quotactl		= xfs_qm_quotactl,
 	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
 };
 EXPORT_SYMBOL(xfs_qmcore_xfs);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 68139b3..c7b66f6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -57,135 +57,16 @@
 # define qdprintk(s, args...)	do { } while (0)
 #endif
 
-STATIC int	xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-STATIC int	xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-STATIC int	xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-STATIC int	xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-STATIC int	xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-STATIC int	xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
 STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
 STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
 					uint);
-STATIC uint	xfs_qm_import_flags(uint);
 STATIC uint	xfs_qm_export_flags(uint);
-STATIC uint	xfs_qm_import_qtype_flags(uint);
 STATIC uint	xfs_qm_export_qtype_flags(uint);
 STATIC void	xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
 					fs_disk_quota_t *);
 
 
 /*
- * The main distribution switch of all XFS quotactl system calls.
- */
-int
-xfs_qm_quotactl(
-	xfs_mount_t	*mp,
-	int		cmd,
-	int		id,
-	xfs_caddr_t	addr)
-{
-	int		error;
-
-	ASSERT(addr != NULL || cmd == Q_XQUOTASYNC);
-
-	/*
-	 * The following commands are valid even when quotaoff.
-	 */
-	switch (cmd) {
-	case Q_XQUOTARM:
-		/*
-		 * Truncate quota files. quota must be off.
-		 */
-		if (XFS_IS_QUOTA_ON(mp))
-			return XFS_ERROR(EINVAL);
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		return (xfs_qm_scall_trunc_qfiles(mp,
-			       xfs_qm_import_qtype_flags(*(uint *)addr)));
-
-	case Q_XGETQSTAT:
-		/*
-		 * Get quota status information.
-		 */
-		return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
-
-	case Q_XQUOTAON:
-		/*
-		 * QUOTAON - enabling quota enforcement.
-		 * Quota accounting must be turned on at mount time.
-		 */
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		return (xfs_qm_scall_quotaon(mp,
-					  xfs_qm_import_flags(*(uint *)addr)));
-
-	case Q_XQUOTAOFF:
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		break;
-
-	case Q_XQUOTASYNC:
-		return xfs_sync_inodes(mp, SYNC_DELWRI);
-
-	default:
-		break;
-	}
-
-	if (! XFS_IS_QUOTA_ON(mp))
-		return XFS_ERROR(ESRCH);
-
-	switch (cmd) {
-	case Q_XQUOTAOFF:
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		error = xfs_qm_scall_quotaoff(mp,
-					    xfs_qm_import_flags(*(uint *)addr),
-					    B_FALSE);
-		break;
-
-	case Q_XGETQUOTA:
-		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
-					(fs_disk_quota_t *)addr);
-		break;
-	case Q_XGETGQUOTA:
-		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
-					(fs_disk_quota_t *)addr);
-		break;
-	case Q_XGETPQUOTA:
-		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
-					(fs_disk_quota_t *)addr);
-		break;
-
-	case Q_XSETQLIM:
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
-					     (fs_disk_quota_t *)addr);
-		break;
-	case Q_XSETGQLIM:
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
-					     (fs_disk_quota_t *)addr);
-		break;
-	case Q_XSETPQLIM:
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return XFS_ERROR(EROFS);
-		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
-					     (fs_disk_quota_t *)addr);
-		break;
-
-	default:
-		error = XFS_ERROR(EINVAL);
-		break;
-	}
-
-	return (error);
-}
-
-/*
  * Turn off quota accounting and/or enforcement for all udquots and/or
  * gdquots. Called only at unmount time.
  *
@@ -193,11 +74,10 @@ xfs_qm_quotactl(
  * incore, and modifies the ondisk dquot directly. Therefore, for example,
  * it is an error to call this twice, without purging the cache.
  */
-STATIC int
+int
 xfs_qm_scall_quotaoff(
 	xfs_mount_t		*mp,
-	uint			flags,
-	boolean_t		force)
+	uint			flags)
 {
 	uint			dqtype;
 	int			error;
@@ -205,8 +85,6 @@ xfs_qm_scall_quotaoff(
 	xfs_qoff_logitem_t	*qoffstart;
 	int			nculprits;
 
-	if (!force && !capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
 	/*
 	 * No file system can have quotas enabled on disk but not in core.
 	 * Note that quota utilities (like quotaoff) _expect_
@@ -375,7 +253,7 @@ out_error:
 	return (error);
 }
 
-STATIC int
+int
 xfs_qm_scall_trunc_qfiles(
 	xfs_mount_t	*mp,
 	uint		flags)
@@ -383,8 +261,6 @@ xfs_qm_scall_trunc_qfiles(
 	int		error = 0, error2 = 0;
 	xfs_inode_t	*qip;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
 	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
 		qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
 		return XFS_ERROR(EINVAL);
@@ -416,7 +292,7 @@ xfs_qm_scall_trunc_qfiles(
  * effect immediately.
  * (Switching on quota accounting must be done at mount time.)
  */
-STATIC int
+int
 xfs_qm_scall_quotaon(
 	xfs_mount_t	*mp,
 	uint		flags)
@@ -426,9 +302,6 @@ xfs_qm_scall_quotaon(
 	uint		accflags;
 	__int64_t	sbflags;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-
 	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
 	/*
 	 * Switching on quota accounting must be done at mount time.
@@ -517,7 +390,7 @@ xfs_qm_scall_quotaon(
 /*
  * Return quota status information, such as uquota-off, enforcements, etc.
  */
-STATIC int
+int
 xfs_qm_scall_getqstat(
 	xfs_mount_t	*mp,
 	fs_quota_stat_t *out)
@@ -582,7 +455,7 @@ xfs_qm_scall_getqstat(
 /*
  * Adjust quota limits, and start/stop timers accordingly.
  */
-STATIC int
+int
 xfs_qm_scall_setqlim(
 	xfs_mount_t		*mp,
 	xfs_dqid_t		id,
@@ -595,9 +468,6 @@ xfs_qm_scall_setqlim(
 	int			error;
 	xfs_qcnt_t		hard, soft;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-
 	if ((newlim->d_fieldmask &
 	    (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
 		return (0);
@@ -742,7 +612,7 @@ xfs_qm_scall_setqlim(
 	return error;
 }
 
-STATIC int
+int
 xfs_qm_scall_getquota(
 	xfs_mount_t	*mp,
 	xfs_dqid_t	id,
@@ -935,30 +805,6 @@ xfs_qm_export_dquot(
 }
 
 STATIC uint
-xfs_qm_import_qtype_flags(
-	uint		uflags)
-{
-	uint		oflags = 0;
-
-	/*
-	 * Can't be more than one, or none.
-	 */
-	if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
-			(XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
-	    ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ==
-			(XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ||
-	    ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ==
-			(XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ||
-	    ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0))
-		return (0);
-
-	oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0;
-	oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0;
-	oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0;
-	return oflags;
-}
-
-STATIC uint
 xfs_qm_export_qtype_flags(
 	uint flags)
 {
@@ -979,26 +825,6 @@ xfs_qm_export_qtype_flags(
 }
 
 STATIC uint
-xfs_qm_import_flags(
-	uint uflags)
-{
-	uint flags = 0;
-
-	if (uflags & XFS_QUOTA_UDQ_ACCT)
-		flags |= XFS_UQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_PDQ_ACCT)
-		flags |= XFS_PQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_GDQ_ACCT)
-		flags |= XFS_GQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_UDQ_ENFD)
-		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
-		flags |= XFS_OQUOTA_ENFD;
-	return (flags);
-}
-
-
-STATIC uint
 xfs_qm_export_flags(
 	uint flags)
 {
@@ -1134,7 +960,7 @@ xfs_dqhash_t *qmtest_udqtab;
 xfs_dqhash_t *qmtest_gdqtab;
 int	      qmtest_hashmask;
 int	      qmtest_nfails;
-mutex_t	      qcheck_lock;
+struct mutex  qcheck_lock;
 
 #define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
 				 (__psunsigned_t)(id)) & \
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index c4fcea6..8286b28 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -42,34 +42,24 @@
 #define XFS_QI_QOFFLOCK(mp)	((mp)->m_quotainfo->qi_quotaofflock)
 
 #define XFS_QI_MPL_LIST(mp)	((mp)->m_quotainfo->qi_dqlist)
-#define XFS_QI_MPLLOCK(mp)	((mp)->m_quotainfo->qi_dqlist.qh_lock)
 #define XFS_QI_MPLNEXT(mp)	((mp)->m_quotainfo->qi_dqlist.qh_next)
 #define XFS_QI_MPLNDQUOTS(mp)	((mp)->m_quotainfo->qi_dqlist.qh_nelems)
 
-#define XQMLCK(h)			(mutex_lock(&((h)->qh_lock)))
-#define XQMUNLCK(h)			(mutex_unlock(&((h)->qh_lock)))
-#ifdef DEBUG
-struct xfs_dqhash;
-static inline int XQMISLCKD(struct xfs_dqhash *h)
-{
-	if (mutex_trylock(&h->qh_lock)) {
-		mutex_unlock(&h->qh_lock);
-		return 0;
-	}
-	return 1;
-}
-#endif
-
-#define XFS_DQ_HASH_LOCK(h)		XQMLCK(h)
-#define XFS_DQ_HASH_UNLOCK(h)		XQMUNLCK(h)
-#define XFS_DQ_IS_HASH_LOCKED(h)	XQMISLCKD(h)
-
-#define xfs_qm_mplist_lock(mp)		XQMLCK(&(XFS_QI_MPL_LIST(mp)))
-#define xfs_qm_mplist_unlock(mp)	XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
-#define XFS_QM_IS_MPLIST_LOCKED(mp)	XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
-
-#define xfs_qm_freelist_lock(qm)	XQMLCK(&((qm)->qm_dqfreelist))
-#define xfs_qm_freelist_unlock(qm)	XQMUNLCK(&((qm)->qm_dqfreelist))
+#define xfs_qm_mplist_lock(mp) \
+	mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock))
+#define xfs_qm_mplist_nowait(mp) \
+	mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock))
+#define xfs_qm_mplist_unlock(mp) \
+	mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock))
+#define XFS_QM_IS_MPLIST_LOCKED(mp) \
+	mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock))
+
+#define xfs_qm_freelist_lock(qm) \
+	mutex_lock(&((qm)->qm_dqfreelist.qh_lock))
+#define xfs_qm_freelist_lock_nowait(qm) \
+	mutex_trylock(&((qm)->qm_dqfreelist.qh_lock))
+#define xfs_qm_freelist_unlock(qm) \
+	mutex_unlock(&((qm)->qm_dqfreelist.qh_lock))
 
 /*
  * Hash into a bucket in the dquot hash table, based on <mp, id>.
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 9961138..447173b 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -624,10 +624,9 @@ xfs_trans_dqresv(
 	xfs_qcnt_t	*resbcountp;
 	xfs_quotainfo_t	*q = mp->m_quotainfo;
 
-	if (! (flags & XFS_QMOPT_DQLOCK)) {
-		xfs_dqlock(dqp);
-	}
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	xfs_dqlock(dqp);
+
 	if (flags & XFS_TRANS_DQ_RES_BLKS) {
 		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 		if (!hardlimit)
@@ -740,10 +739,8 @@ xfs_trans_dqresv(
 	ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
 
 error_return:
-	if (! (flags & XFS_QMOPT_DQLOCK)) {
-		xfs_dqunlock(dqp);
-	}
-	return (error);
+	xfs_dqunlock(dqp);
+	return error;
 }
 
 
@@ -753,8 +750,7 @@ error_return:
  * grp/prj quotas is important, because this follows a both-or-nothing
  * approach.
  *
- * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked.
- *	   XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
  *	   XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
  *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
  *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index ae54829..3f3610a 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -24,6 +24,7 @@
 #include "xfs_ag.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_error.h"
 
 static char		message[1024];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 5830c04..b83f76b 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,10 +17,6 @@
  */
 #include <xfs.h>
 
-static DEFINE_MUTEX(uuid_monitor);
-static int	uuid_table_size;
-static uuid_t	*uuid_table;
-
 /* IRIX interpretation of an uuid_t */
 typedef struct {
 	__be32	uu_timelow;
@@ -46,12 +42,6 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 	fsid[1] = be32_to_cpu(uup->uu_timelow);
 }
 
-void
-uuid_create_nil(uuid_t *uuid)
-{
-	memset(uuid, 0, sizeof(*uuid));
-}
-
 int
 uuid_is_nil(uuid_t *uuid)
 {
@@ -71,64 +61,3 @@ uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
 {
 	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
 }
-
-/*
- * Given a 128-bit uuid, return a 64-bit value by adding the top and bottom
- * 64-bit words.  NOTE: This function can not be changed EVER.  Although
- * brain-dead, some applications depend on this 64-bit value remaining
- * persistent.  Specifically, DMI vendors store the value as a persistent
- * filehandle.
- */
-__uint64_t
-uuid_hash64(uuid_t *uuid)
-{
-	__uint64_t	*sp = (__uint64_t *)uuid;
-
-	return sp[0] + sp[1];
-}
-
-int
-uuid_table_insert(uuid_t *uuid)
-{
-	int	i, hole;
-
-	mutex_lock(&uuid_monitor);
-	for (i = 0, hole = -1; i < uuid_table_size; i++) {
-		if (uuid_is_nil(&uuid_table[i])) {
-			hole = i;
-			continue;
-		}
-		if (uuid_equal(uuid, &uuid_table[i])) {
-			mutex_unlock(&uuid_monitor);
-			return 0;
-		}
-	}
-	if (hole < 0) {
-		uuid_table = kmem_realloc(uuid_table,
-			(uuid_table_size + 1) * sizeof(*uuid_table),
-			uuid_table_size  * sizeof(*uuid_table),
-			KM_SLEEP);
-		hole = uuid_table_size++;
-	}
-	uuid_table[hole] = *uuid;
-	mutex_unlock(&uuid_monitor);
-	return 1;
-}
-
-void
-uuid_table_remove(uuid_t *uuid)
-{
-	int	i;
-
-	mutex_lock(&uuid_monitor);
-	for (i = 0; i < uuid_table_size; i++) {
-		if (uuid_is_nil(&uuid_table[i]))
-			continue;
-		if (!uuid_equal(uuid, &uuid_table[i]))
-			continue;
-		uuid_create_nil(&uuid_table[i]);
-		break;
-	}
-	ASSERT(i < uuid_table_size);
-	mutex_unlock(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index cff5b60..4732d71 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,12 +22,8 @@ typedef struct {
 	unsigned char	__u_bits[16];
 } uuid_t;
 
-extern void uuid_create_nil(uuid_t *uuid);
 extern int uuid_is_nil(uuid_t *uuid);
 extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
 extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-extern __uint64_t uuid_hash64(uuid_t *uuid);
-extern int uuid_table_insert(uuid_t *uuid);
-extern void uuid_table_remove(uuid_t *uuid);
 
 #endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 143d63e..c8641f7 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,8 +223,8 @@ typedef struct xfs_perag
 		be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
 #define	XFS_MIN_FREELIST_PAG(pag,mp)	\
 	(XFS_MIN_FREELIST_RAW(		\
-		(uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
-		(uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
+		(unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
+		(unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
 
 #define XFS_AGB_TO_FSB(mp,agno,agbno)	\
 	(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 028e44e..2cf944e 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1872,6 +1872,25 @@ xfs_alloc_compute_maxlevels(
 }
 
 /*
+ * Find the length of the longest extent in an AG.
+ */
+xfs_extlen_t
+xfs_alloc_longest_free_extent(
+	struct xfs_mount	*mp,
+	struct xfs_perag	*pag)
+{
+	xfs_extlen_t		need, delta = 0;
+
+	need = XFS_MIN_FREELIST_PAG(pag, mp);
+	if (need > pag->pagf_flcount)
+		delta = need - pag->pagf_flcount;
+
+	if (pag->pagf_longest > delta)
+		return pag->pagf_longest - delta;
+	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
+}
+
+/*
  * Decide whether to use this allocation group for this allocation.
  * If so, fix up the btree freelist's size.
  */
@@ -1923,15 +1942,12 @@ xfs_alloc_fix_freelist(
 	}
 
 	if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-		need = XFS_MIN_FREELIST_PAG(pag, mp);
-		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
 		/*
 		 * If it looks like there isn't a long enough extent, or enough
 		 * total blocks, reject it.
 		 */
-		longest = (pag->pagf_longest > delta) ?
-			(pag->pagf_longest - delta) :
-			(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+		need = XFS_MIN_FREELIST_PAG(pag, mp);
+		longest = xfs_alloc_longest_free_extent(mp, pag);
 		if ((args->minlen + args->alignment + args->minalignslop - 1) >
 				longest ||
 		    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5881727..e704caee 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -100,6 +100,12 @@ typedef struct xfs_alloc_arg {
 #define XFS_ALLOC_USERDATA		1	/* allocation is for user data*/
 #define XFS_ALLOC_INITIAL_USER_DATA	2	/* special case start of file */
 
+/*
+ * Find the length of the longest extent in an AG.
+ */
+xfs_extlen_t
+xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+		struct xfs_perag *pag);
 
 #ifdef __KERNEL__
 
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 6c323f8..afdc891 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -155,7 +155,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
 		 * minimum offset only needs to be the space required for 
 		 * the btree root.
 		 */ 
-		if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > mp->m_attroffset)
+		if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
+		    xfs_default_attroffset(dp))
 			dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
 		break;
 		
@@ -298,6 +299,26 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 }
 
 /*
+ * After the last attribute is removed revert to original inode format,
+ * making all literal area available to the data fork once more.
+ */
+STATIC void
+xfs_attr_fork_reset(
+	struct xfs_inode	*ip,
+	struct xfs_trans	*tp)
+{
+	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+	ip->i_d.di_forkoff = 0;
+	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+	ASSERT(ip->i_d.di_anextents == 0);
+	ASSERT(ip->i_afp == NULL);
+
+	ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/*
  * Remove an attribute from the shortform attribute list structure.
  */
 int
@@ -344,22 +365,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	 */
 	totsize -= size;
 	if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
-				!(args->op_flags & XFS_DA_OP_ADDNAME) &&
-				(mp->m_flags & XFS_MOUNT_ATTR2) &&
-				(dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
-		/*
-		 * Last attribute now removed, revert to original
-		 * inode format making all literal area available
-		 * to the data fork once more.
-		 */
-		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
-		dp->i_d.di_forkoff = 0;
-		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-		ASSERT(dp->i_d.di_anextents == 0);
-		ASSERT(dp->i_afp == NULL);
-		dp->i_df.if_ext_max =
-			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
-		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+	    (mp->m_flags & XFS_MOUNT_ATTR2) &&
+	    (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+	    !(args->op_flags & XFS_DA_OP_ADDNAME)) {
+		xfs_attr_fork_reset(dp, args->trans);
 	} else {
 		xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
 		dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -786,20 +795,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	if (forkoff == -1) {
 		ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
 		ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
-
-		/*
-		 * Last attribute was removed, revert to original
-		 * inode format making all literal area available
-		 * to the data fork once more.
-		 */
-		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
-		dp->i_d.di_forkoff = 0;
-		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-		ASSERT(dp->i_d.di_anextents == 0);
-		ASSERT(dp->i_afp == NULL);
-		dp->i_df.if_ext_max =
-			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
-		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+		xfs_attr_fork_reset(dp, args->trans);
 		goto out;
 	}
 
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c852cd6..3a6ed42 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2479,7 +2479,7 @@ xfs_bmap_adjacent(
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
 	/*
 	 * If allocating at eof, and there's a previous real block,
-	 * try to use it's last block as our starting point.
+	 * try to use its last block as our starting point.
 	 */
 	if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
 	    !isnullstartblock(ap->prevp->br_startblock) &&
@@ -2712,9 +2712,6 @@ xfs_bmap_btalloc(
 	xfs_agnumber_t	startag;
 	xfs_alloc_arg_t	args;
 	xfs_extlen_t	blen;
-	xfs_extlen_t	delta;
-	xfs_extlen_t	longest;
-	xfs_extlen_t	need;
 	xfs_extlen_t	nextminlen = 0;
 	xfs_perag_t	*pag;
 	int		nullfb;		/* true if ap->firstblock isn't set */
@@ -2796,13 +2793,8 @@ xfs_bmap_btalloc(
 			 * See xfs_alloc_fix_freelist...
 			 */
 			if (pag->pagf_init) {
-				need = XFS_MIN_FREELIST_PAG(pag, mp);
-				delta = need > pag->pagf_flcount ?
-					need - pag->pagf_flcount : 0;
-				longest = (pag->pagf_longest > delta) ?
-					(pag->pagf_longest - delta) :
-					(pag->pagf_flcount > 0 ||
-					 pag->pagf_longest > 0);
+				xfs_extlen_t	longest;
+				longest = xfs_alloc_longest_free_extent(mp, pag);
 				if (blen < longest)
 					blen = longest;
 			} else
@@ -3577,6 +3569,27 @@ xfs_bmap_extents_to_btree(
 }
 
 /*
+ * Calculate the default attribute fork offset for newly created inodes.
+ */
+uint
+xfs_default_attroffset(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	uint			offset;
+
+	if (mp->m_sb.sb_inodesize == 256) {
+		offset = XFS_LITINO(mp) -
+				XFS_BMDR_SPACE_CALC(MINABTPTRS);
+	} else {
+		offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
+	}
+
+	ASSERT(offset < XFS_LITINO(mp));
+	return offset;
+}
+
+/*
  * Helper routine to reset inode di_forkoff field when switching
  * attribute fork from local to extent format - we reset it where
  * possible to make space available for inline data fork extents.
@@ -3588,15 +3601,18 @@ xfs_bmap_forkoff_reset(
 	int		whichfork)
 {
 	if (whichfork == XFS_ATTR_FORK &&
-	    (ip->i_d.di_format != XFS_DINODE_FMT_DEV) &&
-	    (ip->i_d.di_format != XFS_DINODE_FMT_UUID) &&
-	    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
-	    ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) {
-		ip->i_d.di_forkoff = mp->m_attroffset >> 3;
-		ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) /
-					(uint)sizeof(xfs_bmbt_rec_t);
-		ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) /
-					(uint)sizeof(xfs_bmbt_rec_t);
+	    ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
+	    ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
+	    ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
+		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
+
+		if (dfl_forkoff > ip->i_d.di_forkoff) {
+			ip->i_d.di_forkoff = dfl_forkoff;
+			ip->i_df.if_ext_max =
+				XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
+			ip->i_afp->if_ext_max =
+				XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
+		}
 	}
 }
 
@@ -4065,7 +4081,7 @@ xfs_bmap_add_attrfork(
 	case XFS_DINODE_FMT_BTREE:
 		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
 		if (!ip->i_d.di_forkoff)
-			ip->i_d.di_forkoff = mp->m_attroffset >> 3;
+			ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
 		else if (mp->m_flags & XFS_MOUNT_ATTR2)
 			version = 2;
 		break;
@@ -4212,12 +4228,12 @@ xfs_bmap_compute_maxlevels(
 	 * (a signed 16-bit number, xfs_aextnum_t).
 	 *
 	 * Note that we can no longer assume that if we are in ATTR1 that
-	 * the fork offset of all the inodes will be (m_attroffset >> 3)
-	 * because we could have mounted with ATTR2 and then mounted back
-	 * with ATTR1, keeping the di_forkoff's fixed but probably at
-	 * various positions. Therefore, for both ATTR1 and ATTR2
-	 * we have to assume the worst case scenario of a minimum size
-	 * available.
+	 * the fork offset of all the inodes will be
+	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
+	 * with ATTR2 and then mounted back with ATTR1, keeping the
+	 * di_forkoff's fixed but probably at various positions. Therefore,
+	 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
+	 * of a minimum size available.
 	 */
 	if (whichfork == XFS_DATA_FORK) {
 		maxleafents = MAXEXTNUM;
@@ -4804,7 +4820,7 @@ xfs_bmapi(
 	xfs_extlen_t	minlen;		/* min allocation size */
 	xfs_mount_t	*mp;		/* xfs mount structure */
 	int		n;		/* current extent index */
-	int		nallocs;	/* number of extents alloc\'d */
+	int		nallocs;	/* number of extents alloc'd */
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	xfs_fileoff_t	obno;		/* old block number (offset) */
 	xfs_bmbt_irec_t	prev;		/* previous file extent record */
@@ -6204,7 +6220,7 @@ xfs_bmap_get_bp(
 	return(bp);
 }
 
-void
+STATIC void
 xfs_check_block(
 	struct xfs_btree_block	*block,
 	xfs_mount_t		*mp,
@@ -6494,7 +6510,7 @@ xfs_bmap_count_tree(
 	block = XFS_BUF_TO_BLOCK(bp);
 
 	if (--level) {
-		/* Not at node above leafs, count this level of nodes */
+		/* Not at node above leaves, count this level of nodes */
 		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 		while (nextbno != NULLFSBLOCK) {
 			if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index be2979d..1b8ff92 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -125,7 +125,7 @@ typedef struct xfs_bmalloca {
 	struct xfs_bmbt_irec	*gotp;	/* extent after, or delayed */
 	xfs_extlen_t		alen;	/* i/o length asked/allocated */
 	xfs_extlen_t		total;	/* total blocks needed for xaction */
-	xfs_extlen_t		minlen;	/* mininum allocation size (blocks) */
+	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
 	xfs_extlen_t		minleft; /* amount must be left after alloc */
 	char			eof;	/* set if allocating past last extent */
 	char			wasdel;	/* replacing a delayed allocation */
@@ -338,6 +338,10 @@ xfs_check_nostate_extents(
 	xfs_extnum_t		idx,
 	xfs_extnum_t		num);
 
+uint
+xfs_default_attroffset(
+	struct xfs_inode	*ip);
+
 #ifdef __KERNEL__
 
 /*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e73c332..e9df995 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -1883,7 +1883,7 @@ xfs_btree_lshift(
 
 	/*
 	 * We add one entry to the left side and remove one for the right side.
-	 * Accout for it here, the changes will be updated on disk and logged
+	 * Account for it here, the changes will be updated on disk and logged
 	 * later.
 	 */
 	lrecs++;
@@ -3535,7 +3535,7 @@ xfs_btree_delrec(
 	XFS_BTREE_STATS_INC(cur, join);
 
 	/*
-	 * Fix up the the number of records and right block pointer in the
+	 * Fix up the number of records and right block pointer in the
 	 * surviving block, and log it.
 	 */
 	xfs_btree_set_numrecs(left, lrecs + rrecs);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 789fffd..4f852b7 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -41,7 +41,7 @@ extern kmem_zone_t	*xfs_btree_cur_zone;
 /*
  * Generic btree header.
  *
- * This is a comination of the actual format used on disk for short and long
+ * This is a combination of the actual format used on disk for short and long
  * format btrees.  The first three fields are shared by both format, but
  * the pointers are different and should be used with care.
  *
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c45f74f..9ff6e57 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1503,7 +1503,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
  * This is implemented with some source-level loop unrolling.
  */
 xfs_dahash_t
-xfs_da_hashname(const uchar_t *name, int namelen)
+xfs_da_hashname(const __uint8_t *name, int namelen)
 {
 	xfs_dahash_t hash;
 
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 70b710c..8c53616 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -91,9 +91,9 @@ enum xfs_dacmp {
  * Structure to ease passing around component names.
  */
 typedef struct xfs_da_args {
-	const uchar_t	*name;		/* string (maybe not NULL terminated) */
+	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
-	uchar_t		*value;		/* set of bytes (maybe contain NULLs) */
+	__uint8_t	*value;		/* set of bytes (maybe contain NULLs) */
 	int		valuelen;	/* length of value */
 	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
 	xfs_dahash_t	hashval;	/* hash value of name */
@@ -185,7 +185,7 @@ typedef struct xfs_da_state {
 	unsigned char		inleaf;		/* insert into 1->lf, 0->splf */
 	unsigned char		extravalid;	/* T/F: extrablk is in use */
 	unsigned char		extraafter;	/* T/F: extrablk is after new */
-	xfs_da_state_blk_t	extrablk;	/* for double-splits on leafs */
+	xfs_da_state_blk_t	extrablk;	/* for double-splits on leaves */
 						/* for dirv2 extrablk is data */
 } xfs_da_state_t;
 
@@ -251,7 +251,7 @@ xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  xfs_dabuf_t *dead_buf);
 
-uint xfs_da_hashname(const uchar_t *name_string, int name_length);
+uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
 enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
 				const char *name, int len);
 
@@ -268,5 +268,6 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
 
 extern struct kmem_zone *xfs_da_state_zone;
 extern struct kmem_zone *xfs_dabuf_zone;
+extern const struct xfs_nameops xfs_default_nameops;
 
 #endif	/* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f8278cf..e6d839b 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -79,6 +79,12 @@ xfs_swapext(
 		goto out_put_target_file;
 	}
 
+	if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
+	    IS_SWAPFILE(target_file->f_path.dentry->d_inode)) {
+		error = XFS_ERROR(EINVAL);
+		goto out_put_target_file;
+	}
+
 	ip = XFS_I(file->f_path.dentry->d_inode);
 	tip = XFS_I(target_file->f_path.dentry->d_inode);
 
@@ -118,19 +124,17 @@ xfs_swap_extents(
 	xfs_bstat_t	*sbp = &sxp->sx_stat;
 	xfs_ifork_t	*tempifp, *ifp, *tifp;
 	int		ilf_fields, tilf_fields;
-	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
 	int		error = 0;
 	int		aforkblks = 0;
 	int		taforkblks = 0;
 	__uint64_t	tmp;
-	char		locked = 0;
 
 	mp = ip->i_mount;
 
 	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
 	if (!tempifp) {
 		error = XFS_ERROR(ENOMEM);
-		goto error0;
+		goto out;
 	}
 
 	sbp = &sxp->sx_stat;
@@ -143,25 +147,24 @@ xfs_swap_extents(
 	 */
 	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
 	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-	locked = 1;
 
 	/* Verify that both files have the same format */
 	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/* Verify both files are either real-time or non-realtime */
 	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/* Should never get a local format */
 	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
 	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_unlock;
 	}
 
 	if (VN_CACHED(VFS_I(tip)) != 0) {
@@ -169,13 +172,13 @@ xfs_swap_extents(
 		error = xfs_flushinval_pages(tip, 0, -1,
 				FI_REMAPF_LOCKED);
 		if (error)
-			goto error0;
+			goto out_unlock;
 	}
 
 	/* Verify O_DIRECT for ftmp */
 	if (VN_CACHED(VFS_I(tip)) != 0) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/* Verify all data are being swapped */
@@ -183,7 +186,7 @@ xfs_swap_extents(
 	    sxp->sx_length != ip->i_d.di_size ||
 	    sxp->sx_length != tip->i_d.di_size) {
 		error = XFS_ERROR(EFAULT);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/*
@@ -193,7 +196,7 @@ xfs_swap_extents(
 	 */
 	if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/*
@@ -208,7 +211,7 @@ xfs_swap_extents(
 	    (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) ||
 	    (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) {
 		error = XFS_ERROR(EBUSY);
-		goto error0;
+		goto out_unlock;
 	}
 
 	/* We need to fail if the file is memory mapped.  Once we have tossed
@@ -219,7 +222,7 @@ xfs_swap_extents(
 	 */
 	if (VN_MAPPED(VFS_I(ip))) {
 		error = XFS_ERROR(EBUSY);
-		goto error0;
+		goto out_unlock;
 	}
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -242,8 +245,7 @@ xfs_swap_extents(
 		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
 		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
 		xfs_trans_cancel(tp, 0);
-		locked = 0;
-		goto error0;
+		goto out;
 	}
 	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
 
@@ -253,19 +255,15 @@ xfs_swap_extents(
 	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
 	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			goto error0;
-		}
+		if (error)
+			goto out_trans_cancel;
 	}
 	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
 	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
 			&taforkblks);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			goto error0;
-		}
+		if (error)
+			goto out_trans_cancel;
 	}
 
 	/*
@@ -332,10 +330,10 @@ xfs_swap_extents(
 
 
 	IHOLD(ip);
-	xfs_trans_ijoin(tp, ip, lock_flags);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
 	IHOLD(tip);
-	xfs_trans_ijoin(tp, tip, lock_flags);
+	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
 	xfs_trans_log_inode(tp, ip,  ilf_fields);
 	xfs_trans_log_inode(tp, tip, tilf_fields);
@@ -344,19 +342,19 @@ xfs_swap_extents(
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC) {
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
 		xfs_trans_set_sync(tp);
-	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
-	locked = 0;
 
- error0:
-	if (locked) {
-		xfs_iunlock(ip,  lock_flags);
-		xfs_iunlock(tip, lock_flags);
-	}
-	if (tempifp != NULL)
-		kmem_free(tempifp);
+out_unlock:
+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out:
+	kmem_free(tempifp);
 	return error;
+
+out_trans_cancel:
+	xfs_trans_cancel(tp, 0);
+	goto out_unlock;
 }
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 162e872..e5b153b 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -103,7 +103,9 @@ typedef enum xfs_dinode_fmt {
 /*
  * Inode size for given fs.
  */
-#define	XFS_LITINO(mp)	((mp)->m_litino)
+#define XFS_LITINO(mp) \
+	((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
+
 #define	XFS_BROOT_SIZE_ADJ	\
 	(XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
 
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 1afb122..c657bec 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,8 +46,6 @@
 
 struct xfs_name xfs_name_dotdot = {"..", 2};
 
-extern const struct xfs_nameops xfs_default_nameops;
-
 /*
  * ASCII case-insensitive (ie. A-Z) support for directories that was
  * used in IRIX.
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index e1f0a06..ab52e9e 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -448,7 +448,6 @@ xfs_dir2_block_getdents(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	char			*ptr;		/* current data entry */
 	int			wantoff;	/* starting block offset */
-	xfs_ino_t		ino;
 	xfs_off_t		cook;
 
 	mp = dp->i_mount;
@@ -509,16 +508,12 @@ xfs_dir2_block_getdents(
 
 		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					    (char *)dep - (char *)block);
-		ino = be64_to_cpu(dep->inumber);
-#if XFS_BIG_INUMS
-		ino += mp->m_inoadd;
-#endif
 
 		/*
 		 * If it didn't fit, set the final offset to here & return.
 		 */
 		if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff,
-			    ino, DT_UNKNOWN)) {
+			    be64_to_cpu(dep->inumber), DT_UNKNOWN)) {
 			*offset = cook & 0x7fffffff;
 			xfs_da_brelse(NULL, bp);
 			return 0;
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index b816e02..efbc290 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -38,7 +38,7 @@ struct xfs_trans;
 
 /*
  * Directory address space divided into sections,
- * spaces separated by 32gb.
+ * spaces separated by 32GB.
  */
 #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
 #define	XFS_DIR2_DATA_SPACE	0
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ef805a3..fa913e45 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -549,7 +549,7 @@ xfs_dir2_leaf_addname(
  * Check the internal consistency of a leaf1 block.
  * Pop an assert if something is wrong.
  */
-void
+STATIC void
 xfs_dir2_leaf_check(
 	xfs_inode_t		*dp,		/* incore directory inode */
 	xfs_dabuf_t		*bp)		/* leaf's buffer */
@@ -780,7 +780,6 @@ xfs_dir2_leaf_getdents(
 	int			ra_index;	/* *map index for read-ahead */
 	int			ra_offset;	/* map entry offset for ra */
 	int			ra_want;	/* readahead count wanted */
-	xfs_ino_t		ino;
 
 	/*
 	 * If the offset is at or past the largest allowed value,
@@ -1076,24 +1075,12 @@ xfs_dir2_leaf_getdents(
 			continue;
 		}
 
-		/*
-		 * Copy the entry into the putargs, and try formatting it.
-		 */
 		dep = (xfs_dir2_data_entry_t *)ptr;
-
 		length = xfs_dir2_data_entsize(dep->namelen);
 
-		ino = be64_to_cpu(dep->inumber);
-#if XFS_BIG_INUMS
-		ino += mp->m_inoadd;
-#endif
-
-		/*
-		 * Won't fit.  Return to caller.
-		 */
 		if (filldir(dirent, dep->name, dep->namelen,
 			    xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
-			    ino, DT_UNKNOWN))
+			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
 			break;
 
 		/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index fa6c3a5..5a81ccd 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1104,7 +1104,7 @@ xfs_dir2_leafn_remove(
 	}
 	xfs_dir2_leafn_check(dp, bp);
 	/*
-	 * Return indication of whether this leaf block is emtpy enough
+	 * Return indication of whether this leaf block is empty enough
 	 * to justify trying to join it with a neighbor.
 	 */
 	*rval =
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index a8a8a6e..e89734e 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -748,11 +748,7 @@ xfs_dir2_sf_getdents(
 	 * Put . entry unless we're starting past it.
 	 */
 	if (*offset <= dot_offset) {
-		ino = dp->i_ino;
-#if XFS_BIG_INUMS
-		ino += mp->m_inoadd;
-#endif
-		if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, ino, DT_DIR)) {
+		if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) {
 			*offset = dot_offset & 0x7fffffff;
 			return 0;
 		}
@@ -763,9 +759,6 @@ xfs_dir2_sf_getdents(
 	 */
 	if (*offset <= dotdot_offset) {
 		ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
-#if XFS_BIG_INUMS
-		ino += mp->m_inoadd;
-#endif
 		if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
 			*offset = dotdot_offset & 0x7fffffff;
 			return 0;
@@ -786,10 +779,6 @@ xfs_dir2_sf_getdents(
 		}
 
 		ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
-#if XFS_BIG_INUMS
-		ino += mp->m_inoadd;
-#endif
-
 		if (filldir(dirent, sfep->name, sfep->namelen,
 			    off & 0x7fffffff, ino, DT_UNKNOWN)) {
 			*offset = off & 0x7fffffff;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 2f049f6..0d22c56 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -33,12 +33,10 @@ typedef struct xfs_extent {
  * conversion routine.
  */
 
-#ifndef HAVE_FORMAT32
 typedef struct xfs_extent_32 {
 	__uint64_t	ext_start;
 	__uint32_t	ext_len;
 } __attribute__((packed)) xfs_extent_32_t;
-#endif
 
 typedef struct xfs_extent_64 {
 	__uint64_t	ext_start;
@@ -59,7 +57,6 @@ typedef struct xfs_efi_log_format {
 	xfs_extent_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_t;
 
-#ifndef HAVE_FORMAT32
 typedef struct xfs_efi_log_format_32 {
 	__uint16_t		efi_type;	/* efi log item type */
 	__uint16_t		efi_size;	/* size of this item */
@@ -67,7 +64,6 @@ typedef struct xfs_efi_log_format_32 {
 	__uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
 } __attribute__((packed)) xfs_efi_log_format_32_t;
-#endif
 
 typedef struct xfs_efi_log_format_64 {
 	__uint16_t		efi_type;	/* efi log item type */
@@ -90,7 +86,6 @@ typedef struct xfs_efd_log_format {
 	xfs_extent_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_t;
 
-#ifndef HAVE_FORMAT32
 typedef struct xfs_efd_log_format_32 {
 	__uint16_t		efd_type;	/* efd log item type */
 	__uint16_t		efd_size;	/* size of this item */
@@ -98,7 +93,6 @@ typedef struct xfs_efd_log_format_32 {
 	__uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
 } __attribute__((packed)) xfs_efd_log_format_32_t;
-#endif
 
 typedef struct xfs_efd_log_format_64 {
 	__uint16_t		efd_type;	/* efd log item type */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index f3bb75d..6c87c8f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -140,7 +140,7 @@ _xfs_filestream_pick_ag(
 	xfs_extlen_t	minlen)
 {
 	int		err, trylock, nscan;
-	xfs_extlen_t	delta, longest, need, free, minfree, maxfree = 0;
+	xfs_extlen_t	longest, free, minfree, maxfree = 0;
 	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER;
 	struct xfs_perag *pag;
 
@@ -186,12 +186,7 @@ _xfs_filestream_pick_ag(
 			goto next_ag;
 		}
 
-		need = XFS_MIN_FREELIST_PAG(pag, mp);
-		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
-		longest = (pag->pagf_longest > delta) ?
-		          (pag->pagf_longest - delta) :
-		          (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
-
+		longest = xfs_alloc_longest_free_extent(mp, pag);
 		if (((minlen && longest >= minlen) ||
 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
 		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 680d0e0..8379e3b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -576,7 +576,7 @@ out:
 	if (fdblks_delta) {
 		/*
 		 * If we are putting blocks back here, m_resblks_avail is
-		 * already at it's max so this will put it in the free pool.
+		 * already at its max so this will put it in the free pool.
 		 *
 		 * If we need space, we'll either succeed in getting it
 		 * from the free block count or we'll get an enospc. If
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index ab016e5..3120a3a 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -230,7 +230,7 @@ xfs_ialloc_ag_alloc(
 		args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
 
 		/* Allow space for the inode btree to split. */
-		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		args.minleft = args.mp->m_in_maxlevels - 1;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
 	} else
@@ -270,7 +270,7 @@ xfs_ialloc_ag_alloc(
 		/*
 		 * Allow space for the inode btree to split.
 		 */
-		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		args.minleft = args.mp->m_in_maxlevels - 1;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
 	}
@@ -349,7 +349,7 @@ xfs_ialloc_ag_alloc(
 		 * Initialize all inodes in this buffer and then log them.
 		 *
 		 * XXX: It would be much better if we had just one transaction to
-		 *      log a whole cluster of inodes instead of all the indivdual
+		 *      log a whole cluster of inodes instead of all the individual
 		 *      transactions causing a lot of log traffic.
 		 */
 		xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
@@ -943,7 +943,7 @@ nextag:
 	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
 				   XFS_INODES_PER_CHUNK) == 0);
 	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
-	XFS_INOBT_CLR_FREE(&rec, offset);
+	rec.ir_free &= ~XFS_INOBT_MASK(offset);
 	rec.ir_freecount--;
 	if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
 			rec.ir_free)))
@@ -1105,11 +1105,11 @@ xfs_difree(
 	 */
 	off = agino - rec.ir_startino;
 	ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
-	ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
+	ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
 	/*
 	 * Mark the inode free & increment the count.
 	 */
-	XFS_INOBT_SET_FREE(&rec, off);
+	rec.ir_free |= XFS_INOBT_MASK(off);
 	rec.ir_freecount++;
 
 	/*
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 99f2408..c282a9a 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -164,7 +164,7 @@ xfs_inobt_init_rec_from_cur(
 }
 
 /*
- * intial value of ptr for lookup
+ * initial value of ptr for lookup
  */
 STATIC void
 xfs_inobt_init_ptr_from_cur(
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 5580e25..f782ad0 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -32,14 +32,14 @@ struct xfs_mount;
 #define	XFS_IBT_MAGIC	0x49414254	/* 'IABT' */
 
 typedef	__uint64_t	xfs_inofree_t;
-#define	XFS_INODES_PER_CHUNK	(NBBY * sizeof(xfs_inofree_t))
+#define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
 #define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3)
-#define	XFS_INOBT_ALL_FREE	((xfs_inofree_t)-1)
+#define	XFS_INOBT_ALL_FREE		((xfs_inofree_t)-1)
+#define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
 
 static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
 {
-	return (((n) >= XFS_INODES_PER_CHUNK ? \
-		(xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i);
+	return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
 }
 
 /*
@@ -69,20 +69,6 @@ typedef struct xfs_inobt_key {
 typedef __be32 xfs_inobt_ptr_t;
 
 /*
- * Bit manipulations for ir_free.
- */
-#define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
-#define	XFS_INOBT_IS_FREE(rp,i)		\
-		(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
-#define	XFS_INOBT_SET_FREE(rp,i)	((rp)->ir_free |= XFS_INOBT_MASK(i))
-#define	XFS_INOBT_CLR_FREE(rp,i)	((rp)->ir_free &= ~XFS_INOBT_MASK(i))
-
-/*
- * Maximum number of inode btree levels.
- */
-#define	XFS_IN_MAXLEVELS(mp)		((mp)->m_in_maxlevels)
-
-/*
  * block numbers in the AG.
  */
 #define	XFS_IBT_BLOCK(mp)		((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1f175fa..f879c1b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -122,7 +122,7 @@ typedef struct xfs_ictimestamp {
 
 /*
  * NOTE:  This structure must be kept identical to struct xfs_dinode
- * 	  in xfs_dinode.h except for the endianess annotations.
+ * 	  in xfs_dinode.h except for the endianness annotations.
  */
 typedef struct xfs_icdinode {
 	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 9957d06..a52ac12 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -40,7 +40,6 @@ typedef struct xfs_inode_log_format {
 	__int32_t		ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_t;
 
-#ifndef HAVE_FORMAT32
 typedef struct xfs_inode_log_format_32 {
 	__uint16_t		ilf_type;	/* inode log item type */
 	__uint16_t		ilf_size;	/* size of this item */
@@ -56,7 +55,6 @@ typedef struct xfs_inode_log_format_32 {
 	__int32_t		ilf_len;	/* len of inode buffer */
 	__int32_t		ilf_boffset;	/* off of inode in buffer */
 } __attribute__((packed)) xfs_inode_log_format_32_t;
-#endif
 
 typedef struct xfs_inode_log_format_64 {
 	__uint16_t		ilf_type;	/* inode log item type */
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index ee1a0c1..a1cc132 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -63,7 +63,7 @@ typedef enum {
  */
 
 typedef struct xfs_iomap {
-	xfs_daddr_t		iomap_bn;	/* first 512b blk of mapping */
+	xfs_daddr_t		iomap_bn;	/* first 512B blk of mapping */
 	xfs_buftarg_t		*iomap_target;
 	xfs_off_t		iomap_offset;	/* offset of mapping, bytes */
 	xfs_off_t		iomap_bsize;	/* size of mapping, bytes */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf98a80..aeb2d22 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -83,7 +83,12 @@ xfs_bulkstat_one_iget(
 	buf->bs_uid = dic->di_uid;
 	buf->bs_gid = dic->di_gid;
 	buf->bs_size = dic->di_size;
-	vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);
+	/*
+	 * We are reading the atime from the Linux inode because the
+	 * dinode might not be uptodate.
+	 */
+	buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec;
+	buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec;
 	buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
 	buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
 	buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
@@ -579,7 +584,7 @@ xfs_bulkstat(
 				 * first inode of the cluster.
 				 *
 				 * Careful with clustidx.   There can be
-				 * multple clusters per chunk, a single
+				 * multiple clusters per chunk, a single
 				 * cluster per chunk or a cluster that has
 				 * inodes represented from several different
 				 * chunks (if blocksize is large).
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f4726f7..f76c6d7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -574,7 +574,7 @@ xfs_log_mount(
 	error = xfs_trans_ail_init(mp);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
-		goto error;
+		goto out_free_log;
 	}
 	mp->m_log->l_ailp = mp->m_ail;
 
@@ -594,20 +594,22 @@ xfs_log_mount(
 			mp->m_flags |= XFS_MOUNT_RDONLY;
 		if (error) {
 			cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
-			goto error;
+			goto out_destroy_ail;
 		}
 	}
 
 	/* Normal transactions can now occur */
 	mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
 
-	/* End mounting message in xfs_log_mount_finish */
 	return 0;
-error:
-	xfs_log_unmount_dealloc(mp);
+
+out_destroy_ail:
+	xfs_trans_ail_destroy(mp);
+out_free_log:
+	xlog_dealloc_log(mp->m_log);
 out:
 	return error;
-}	/* xfs_log_mount */
+}
 
 /*
  * Finish the recovery of the file system.  This is separate from
@@ -633,19 +635,6 @@ xfs_log_mount_finish(xfs_mount_t *mp)
 }
 
 /*
- * Unmount processing for the log.
- */
-int
-xfs_log_unmount(xfs_mount_t *mp)
-{
-	int		error;
-
-	error = xfs_log_unmount_write(mp);
-	xfs_log_unmount_dealloc(mp);
-	return error;
-}
-
-/*
  * Final log writes as part of unmount.
  *
  * Mark the filesystem clean as unmount happens.  Note that during relocation
@@ -795,7 +784,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
  * and deallocate the log as the aild references the log.
  */
 void
-xfs_log_unmount_dealloc(xfs_mount_t *mp)
+xfs_log_unmount(xfs_mount_t *mp)
 {
 	xfs_trans_ail_destroy(mp);
 	xlog_dealloc_log(mp->m_log);
@@ -1109,7 +1098,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
 /*
  * Return size of each in-core log record buffer.
  *
- * All machines get 8 x 32KB buffers by default, unless tuned otherwise.
+ * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
  *
  * If the filesystem blocksize is too large, we may need to choose a
  * larger size since the directory code currently logs entire blocks.
@@ -1139,8 +1128,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 		}
 
 		if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-			/* # headers = size / 32K
-			 * one header holds cycles from 32K of data
+			/* # headers = size / 32k
+			 * one header holds cycles from 32k of data
 			 */
 
 			xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
@@ -1156,7 +1145,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 		goto done;
 	}
 
-	/* All machines use 32KB buffers by default. */
+	/* All machines use 32kB buffers by default. */
 	log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
 	log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
 
@@ -1164,32 +1153,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 	log->l_iclog_hsize = BBSIZE;
 	log->l_iclog_heads = 1;
 
-	/*
-	 * For 16KB, we use 3 32KB buffers.  For 32KB block sizes, we use
-	 * 4 32KB buffers.  For 64KB block sizes, we use 8 32KB buffers.
-	 */
-	if (mp->m_sb.sb_blocksize >= 16*1024) {
-		log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
-		log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-		if (mp->m_logbufs <= 0) {
-			switch (mp->m_sb.sb_blocksize) {
-			    case 16*1024:			/* 16 KB */
-				log->l_iclog_bufs = 3;
-				break;
-			    case 32*1024:			/* 32 KB */
-				log->l_iclog_bufs = 4;
-				break;
-			    case 64*1024:			/* 64 KB */
-				log->l_iclog_bufs = 8;
-				break;
-			    default:
-				xlog_panic("XFS: Invalid blocksize");
-				break;
-			}
-		}
-	}
-
-done:	/* are we being asked to make the sizes selected above visible? */
+done:
+	/* are we being asked to make the sizes selected above visible? */
 	if (mp->m_logbufs == 0)
 		mp->m_logbufs = log->l_iclog_bufs;
 	if (mp->m_logbsize == 0)
@@ -3214,7 +3179,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
  */
 
 /*
- * Free a used ticket when it's refcount falls to zero.
+ * Free a used ticket when its refcount falls to zero.
  */
 void
 xfs_log_ticket_put(
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 8a3e84e..d0c9baa 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -170,9 +170,8 @@ int	  xfs_log_write(struct xfs_mount *mp,
 			int		 nentries,
 			xfs_log_ticket_t ticket,
 			xfs_lsn_t	 *start_lsn);
-int	  xfs_log_unmount(struct xfs_mount *mp);
 int	  xfs_log_unmount_write(struct xfs_mount *mp);
-void      xfs_log_unmount_dealloc(struct xfs_mount *mp);
+void      xfs_log_unmount(struct xfs_mount *mp);
 int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
 int	  xfs_log_need_covered(struct xfs_mount *mp);
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 654167b..bcad5f4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -359,7 +359,7 @@ typedef struct xlog_in_core {
 	int			ic_size;
 	int			ic_offset;
 	int			ic_bwritecnt;
-	ushort_t		ic_state;
+	unsigned short		ic_state;
 	char			*ic_datap;	/* pointer to iclog data */
 #ifdef XFS_LOG_TRACE
 	struct ktrace		*ic_trace;
@@ -455,7 +455,6 @@ extern void	 xlog_recover_process_iunlinks(xlog_t *log);
 
 extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
 extern void	 xlog_put_bp(struct xfs_buf *);
-extern int	 xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
 
 extern kmem_zone_t	*xfs_log_ticket_zone;
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 61af610..7ba4501 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -94,12 +94,30 @@ xlog_put_bp(
 	xfs_buf_free(bp);
 }
 
+STATIC xfs_caddr_t
+xlog_align(
+	xlog_t		*log,
+	xfs_daddr_t	blk_no,
+	int		nbblks,
+	xfs_buf_t	*bp)
+{
+	xfs_caddr_t	ptr;
+
+	if (!log->l_sectbb_log)
+		return XFS_BUF_PTR(bp);
+
+	ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
+	ASSERT(XFS_BUF_SIZE(bp) >=
+		BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
+	return ptr;
+}
+
 
 /*
  * nbblks should be uint, but oh well.  Just want to catch that 32-bit length.
  */
-int
-xlog_bread(
+STATIC int
+xlog_bread_noalign(
 	xlog_t		*log,
 	xfs_daddr_t	blk_no,
 	int		nbblks,
@@ -137,6 +155,24 @@ xlog_bread(
 	return error;
 }
 
+STATIC int
+xlog_bread(
+	xlog_t		*log,
+	xfs_daddr_t	blk_no,
+	int		nbblks,
+	xfs_buf_t	*bp,
+	xfs_caddr_t	*offset)
+{
+	int		error;
+
+	error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+	if (error)
+		return error;
+
+	*offset = xlog_align(log, blk_no, nbblks, bp);
+	return 0;
+}
+
 /*
  * Write out the buffer at the given block for the given number of blocks.
  * The buffer is kept locked across the write and is returned locked.
@@ -180,24 +216,6 @@ xlog_bwrite(
 	return error;
 }
 
-STATIC xfs_caddr_t
-xlog_align(
-	xlog_t		*log,
-	xfs_daddr_t	blk_no,
-	int		nbblks,
-	xfs_buf_t	*bp)
-{
-	xfs_caddr_t	ptr;
-
-	if (!log->l_sectbb_log)
-		return XFS_BUF_PTR(bp);
-
-	ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
-	ASSERT(XFS_BUF_SIZE(bp) >=
-		BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
-	return ptr;
-}
-
 #ifdef DEBUG
 /*
  * dump debug superblock and log record information
@@ -211,11 +229,11 @@ xlog_header_check_dump(
 
 	cmn_err(CE_DEBUG, "%s:  SB : uuid = ", __func__);
 	for (b = 0; b < 16; b++)
-		cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
+		cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
 	cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
 	cmn_err(CE_DEBUG, "    log : uuid = ");
 	for (b = 0; b < 16; b++)
-		cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
+		cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
 	cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
 }
 #else
@@ -321,9 +339,9 @@ xlog_find_cycle_start(
 
 	mid_blk = BLK_AVG(first_blk, *last_blk);
 	while (mid_blk != first_blk && mid_blk != *last_blk) {
-		if ((error = xlog_bread(log, mid_blk, 1, bp)))
+		error = xlog_bread(log, mid_blk, 1, bp, &offset);
+		if (error)
 			return error;
-		offset = xlog_align(log, mid_blk, 1, bp);
 		mid_cycle = xlog_get_cycle(offset);
 		if (mid_cycle == cycle) {
 			*last_blk = mid_blk;
@@ -379,10 +397,10 @@ xlog_find_verify_cycle(
 
 		bcount = min(bufblks, (start_blk + nbblks - i));
 
-		if ((error = xlog_bread(log, i, bcount, bp)))
+		error = xlog_bread(log, i, bcount, bp, &buf);
+		if (error)
 			goto out;
 
-		buf = xlog_align(log, i, bcount, bp);
 		for (j = 0; j < bcount; j++) {
 			cycle = xlog_get_cycle(buf);
 			if (cycle == stop_on_cycle_no) {
@@ -436,9 +454,9 @@ xlog_find_verify_log_record(
 			return ENOMEM;
 		smallmem = 1;
 	} else {
-		if ((error = xlog_bread(log, start_blk, num_blks, bp)))
+		error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+		if (error)
 			goto out;
-		offset = xlog_align(log, start_blk, num_blks, bp);
 		offset += ((num_blks - 1) << BBSHIFT);
 	}
 
@@ -453,9 +471,9 @@ xlog_find_verify_log_record(
 		}
 
 		if (smallmem) {
-			if ((error = xlog_bread(log, i, 1, bp)))
+			error = xlog_bread(log, i, 1, bp, &offset);
+			if (error)
 				goto out;
-			offset = xlog_align(log, i, 1, bp);
 		}
 
 		head = (xlog_rec_header_t *)offset;
@@ -559,15 +577,18 @@ xlog_find_head(
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
 		return ENOMEM;
-	if ((error = xlog_bread(log, 0, 1, bp)))
+
+	error = xlog_bread(log, 0, 1, bp, &offset);
+	if (error)
 		goto bp_err;
-	offset = xlog_align(log, 0, 1, bp);
+
 	first_half_cycle = xlog_get_cycle(offset);
 
 	last_blk = head_blk = log_bbnum - 1;	/* get cycle # of last block */
-	if ((error = xlog_bread(log, last_blk, 1, bp)))
+	error = xlog_bread(log, last_blk, 1, bp, &offset);
+	if (error)
 		goto bp_err;
-	offset = xlog_align(log, last_blk, 1, bp);
+
 	last_half_cycle = xlog_get_cycle(offset);
 	ASSERT(last_half_cycle != 0);
 
@@ -817,9 +838,10 @@ xlog_find_tail(
 	if (!bp)
 		return ENOMEM;
 	if (*head_blk == 0) {				/* special case */
-		if ((error = xlog_bread(log, 0, 1, bp)))
+		error = xlog_bread(log, 0, 1, bp, &offset);
+		if (error)
 			goto bread_err;
-		offset = xlog_align(log, 0, 1, bp);
+
 		if (xlog_get_cycle(offset) == 0) {
 			*tail_blk = 0;
 			/* leave all other log inited values alone */
@@ -832,9 +854,10 @@ xlog_find_tail(
 	 */
 	ASSERT(*head_blk < INT_MAX);
 	for (i = (int)(*head_blk) - 1; i >= 0; i--) {
-		if ((error = xlog_bread(log, i, 1, bp)))
+		error = xlog_bread(log, i, 1, bp, &offset);
+		if (error)
 			goto bread_err;
-		offset = xlog_align(log, i, 1, bp);
+
 		if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
 			found = 1;
 			break;
@@ -848,9 +871,10 @@ xlog_find_tail(
 	 */
 	if (!found) {
 		for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
-			if ((error = xlog_bread(log, i, 1, bp)))
+			error = xlog_bread(log, i, 1, bp, &offset);
+			if (error)
 				goto bread_err;
-			offset = xlog_align(log, i, 1, bp);
+
 			if (XLOG_HEADER_MAGIC_NUM ==
 			    be32_to_cpu(*(__be32 *)offset)) {
 				found = 2;
@@ -922,10 +946,10 @@ xlog_find_tail(
 	if (*head_blk == after_umount_blk &&
 	    be32_to_cpu(rhead->h_num_logops) == 1) {
 		umount_data_blk = (i + hblks) % log->l_logBBsize;
-		if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
+		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		if (error)
 			goto bread_err;
-		}
-		offset = xlog_align(log, umount_data_blk, 1, bp);
+
 		op_head = (xlog_op_header_t *)offset;
 		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
 			/*
@@ -1017,9 +1041,10 @@ xlog_find_zeroed(
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
 		return ENOMEM;
-	if ((error = xlog_bread(log, 0, 1, bp)))
+	error = xlog_bread(log, 0, 1, bp, &offset);
+	if (error)
 		goto bp_err;
-	offset = xlog_align(log, 0, 1, bp);
+
 	first_cycle = xlog_get_cycle(offset);
 	if (first_cycle == 0) {		/* completely zeroed log */
 		*blk_no = 0;
@@ -1028,9 +1053,10 @@ xlog_find_zeroed(
 	}
 
 	/* check partially zeroed log */
-	if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
+	error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+	if (error)
 		goto bp_err;
-	offset = xlog_align(log, log_bbnum-1, 1, bp);
+
 	last_cycle = xlog_get_cycle(offset);
 	if (last_cycle != 0) {		/* log completely written to */
 		xlog_put_bp(bp);
@@ -1152,10 +1178,10 @@ xlog_write_log_records(
 	 */
 	balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
 	if (balign != start_block) {
-		if ((error = xlog_bread(log, start_block, 1, bp))) {
-			xlog_put_bp(bp);
-			return error;
-		}
+		error = xlog_bread_noalign(log, start_block, 1, bp);
+		if (error)
+			goto out_put_bp;
+
 		j = start_block - balign;
 	}
 
@@ -1175,10 +1201,14 @@ xlog_write_log_records(
 			balign = BBTOB(ealign - start_block);
 			error = XFS_BUF_SET_PTR(bp, offset + balign,
 						BBTOB(sectbb));
-			if (!error)
-				error = xlog_bread(log, ealign, sectbb, bp);
-			if (!error)
-				error = XFS_BUF_SET_PTR(bp, offset, bufblks);
+			if (error)
+				break;
+
+			error = xlog_bread_noalign(log, ealign, sectbb, bp);
+			if (error)
+				break;
+
+			error = XFS_BUF_SET_PTR(bp, offset, bufblks);
 			if (error)
 				break;
 		}
@@ -1195,6 +1225,8 @@ xlog_write_log_records(
 		start_block += endcount;
 		j = 0;
 	}
+
+ out_put_bp:
 	xlog_put_bp(bp);
 	return error;
 }
@@ -2511,16 +2543,10 @@ xlog_recover_do_inode_trans(
 	}
 
 write_inode_buffer:
-	if (ITEM_TYPE(item) == XFS_LI_INODE) {
-		ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
-		bp->b_mount = mp;
-		XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
-		xfs_bdwrite(mp, bp);
-	} else {
-		XFS_BUF_STALE(bp);
-		error = xfs_bwrite(mp, bp);
-	}
-
+	ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+	bp->b_mount = mp;
+	XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+	xfs_bdwrite(mp, bp);
 error:
 	if (need_free)
 		kmem_free(in_f);
@@ -2769,51 +2795,48 @@ xlog_recover_do_trans(
 	int			error = 0;
 	xlog_recover_item_t	*item, *first_item;
 
-	if ((error = xlog_recover_reorder_trans(trans)))
+	error = xlog_recover_reorder_trans(trans);
+	if (error)
 		return error;
+
 	first_item = item = trans->r_itemq;
 	do {
-		/*
-		 * we don't need to worry about the block number being
-		 * truncated in > 1 TB buffers because in user-land,
-		 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
-		 * the blknos will get through the user-mode buffer
-		 * cache properly.  The only bad case is o32 kernels
-		 * where xfs_daddr_t is 32-bits but mount will warn us
-		 * off a > 1 TB filesystem before we get here.
-		 */
-		if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
-			if  ((error = xlog_recover_do_buffer_trans(log, item,
-								 pass)))
-				break;
-		} else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
-			if ((error = xlog_recover_do_inode_trans(log, item,
-								pass)))
-				break;
-		} else if (ITEM_TYPE(item) == XFS_LI_EFI) {
-			if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
-						  pass)))
-				break;
-		} else if (ITEM_TYPE(item) == XFS_LI_EFD) {
+		switch (ITEM_TYPE(item)) {
+		case XFS_LI_BUF:
+			error = xlog_recover_do_buffer_trans(log, item, pass);
+			break;
+		case XFS_LI_INODE:
+			error = xlog_recover_do_inode_trans(log, item, pass);
+			break;
+		case XFS_LI_EFI:
+			error = xlog_recover_do_efi_trans(log, item,
+							  trans->r_lsn, pass);
+			break;
+		case XFS_LI_EFD:
 			xlog_recover_do_efd_trans(log, item, pass);
-		} else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
-			if ((error = xlog_recover_do_dquot_trans(log, item,
-								   pass)))
-					break;
-		} else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) {
-			if ((error = xlog_recover_do_quotaoff_trans(log, item,
-								   pass)))
-					break;
-		} else {
-			xlog_warn("XFS: xlog_recover_do_trans");
+			error = 0;
+			break;
+		case XFS_LI_DQUOT:
+			error = xlog_recover_do_dquot_trans(log, item, pass);
+			break;
+		case XFS_LI_QUOTAOFF:
+			error = xlog_recover_do_quotaoff_trans(log, item,
+							       pass);
+			break;
+		default:
+			xlog_warn(
+	"XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
 			ASSERT(0);
 			error = XFS_ERROR(EIO);
 			break;
 		}
+
+		if (error)
+			return error;
 		item = item->ri_next;
 	} while (first_item != item);
 
-	return error;
+	return 0;
 }
 
 /*
@@ -3490,9 +3513,11 @@ xlog_do_recovery_pass(
 		hbp = xlog_get_bp(log, 1);
 		if (!hbp)
 			return ENOMEM;
-		if ((error = xlog_bread(log, tail_blk, 1, hbp)))
+
+		error = xlog_bread(log, tail_blk, 1, hbp, &offset);
+		if (error)
 			goto bread_err1;
-		offset = xlog_align(log, tail_blk, 1, hbp);
+
 		rhead = (xlog_rec_header_t *)offset;
 		error = xlog_valid_rec_header(log, rhead, tail_blk);
 		if (error)
@@ -3526,9 +3551,10 @@ xlog_do_recovery_pass(
 	memset(rhash, 0, sizeof(rhash));
 	if (tail_blk <= head_blk) {
 		for (blk_no = tail_blk; blk_no < head_blk; ) {
-			if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+			error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+			if (error)
 				goto bread_err2;
-			offset = xlog_align(log, blk_no, hblks, hbp);
+
 			rhead = (xlog_rec_header_t *)offset;
 			error = xlog_valid_rec_header(log, rhead, blk_no);
 			if (error)
@@ -3536,10 +3562,11 @@ xlog_do_recovery_pass(
 
 			/* blocks in data section */
 			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-			error = xlog_bread(log, blk_no + hblks, bblks, dbp);
+			error = xlog_bread(log, blk_no + hblks, bblks, dbp,
+					   &offset);
 			if (error)
 				goto bread_err2;
-			offset = xlog_align(log, blk_no + hblks, bblks, dbp);
+
 			xlog_unpack_data(rhead, offset, log);
 			if ((error = xlog_recover_process_data(log,
 						rhash, rhead, offset, pass)))
@@ -3562,10 +3589,10 @@ xlog_do_recovery_pass(
 			wrapped_hblks = 0;
 			if (blk_no + hblks <= log->l_logBBsize) {
 				/* Read header in one read */
-				error = xlog_bread(log, blk_no, hblks, hbp);
+				error = xlog_bread(log, blk_no, hblks, hbp,
+						   &offset);
 				if (error)
 					goto bread_err2;
-				offset = xlog_align(log, blk_no, hblks, hbp);
 			} else {
 				/* This LR is split across physical log end */
 				if (blk_no != log->l_logBBsize) {
@@ -3573,12 +3600,13 @@ xlog_do_recovery_pass(
 					ASSERT(blk_no <= INT_MAX);
 					split_hblks = log->l_logBBsize - (int)blk_no;
 					ASSERT(split_hblks > 0);
-					if ((error = xlog_bread(log, blk_no,
-							split_hblks, hbp)))
+					error = xlog_bread(log, blk_no,
+							   split_hblks, hbp,
+							   &offset);
+					if (error)
 						goto bread_err2;
-					offset = xlog_align(log, blk_no,
-							split_hblks, hbp);
 				}
+
 				/*
 				 * Note: this black magic still works with
 				 * large sector sizes (non-512) only because:
@@ -3596,14 +3624,19 @@ xlog_do_recovery_pass(
 				error = XFS_BUF_SET_PTR(hbp,
 						bufaddr + BBTOB(split_hblks),
 						BBTOB(hblks - split_hblks));
-				if (!error)
-					error = xlog_bread(log, 0,
-							wrapped_hblks, hbp);
-				if (!error)
-					error = XFS_BUF_SET_PTR(hbp, bufaddr,
+				if (error)
+					goto bread_err2;
+
+				error = xlog_bread_noalign(log, 0,
+							   wrapped_hblks, hbp);
+				if (error)
+					goto bread_err2;
+
+				error = XFS_BUF_SET_PTR(hbp, bufaddr,
 							BBTOB(hblks));
 				if (error)
 					goto bread_err2;
+
 				if (!offset)
 					offset = xlog_align(log, 0,
 							wrapped_hblks, hbp);
@@ -3619,10 +3652,10 @@ xlog_do_recovery_pass(
 
 			/* Read in data for log record */
 			if (blk_no + bblks <= log->l_logBBsize) {
-				error = xlog_bread(log, blk_no, bblks, dbp);
+				error = xlog_bread(log, blk_no, bblks, dbp,
+						   &offset);
 				if (error)
 					goto bread_err2;
-				offset = xlog_align(log, blk_no, bblks, dbp);
 			} else {
 				/* This log record is split across the
 				 * physical end of log */
@@ -3636,12 +3669,13 @@ xlog_do_recovery_pass(
 					split_bblks =
 						log->l_logBBsize - (int)blk_no;
 					ASSERT(split_bblks > 0);
-					if ((error = xlog_bread(log, blk_no,
-							split_bblks, dbp)))
+					error = xlog_bread(log, blk_no,
+							split_bblks, dbp,
+							&offset);
+					if (error)
 						goto bread_err2;
-					offset = xlog_align(log, blk_no,
-							split_bblks, dbp);
 				}
+
 				/*
 				 * Note: this black magic still works with
 				 * large sector sizes (non-512) only because:
@@ -3658,15 +3692,19 @@ xlog_do_recovery_pass(
 				error = XFS_BUF_SET_PTR(dbp,
 						bufaddr + BBTOB(split_bblks),
 						BBTOB(bblks - split_bblks));
-				if (!error)
-					error = xlog_bread(log, wrapped_hblks,
-							bblks - split_bblks,
-							dbp);
-				if (!error)
-					error = XFS_BUF_SET_PTR(dbp, bufaddr,
-							h_size);
 				if (error)
 					goto bread_err2;
+
+				error = xlog_bread_noalign(log, wrapped_hblks,
+						bblks - split_bblks,
+						dbp);
+				if (error)
+					goto bread_err2;
+
+				error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
+				if (error)
+					goto bread_err2;
+
 				if (!offset)
 					offset = xlog_align(log, wrapped_hblks,
 						bblks - split_bblks, dbp);
@@ -3683,17 +3721,21 @@ xlog_do_recovery_pass(
 
 		/* read first part of physical log */
 		while (blk_no < head_blk) {
-			if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+			error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+			if (error)
 				goto bread_err2;
-			offset = xlog_align(log, blk_no, hblks, hbp);
+
 			rhead = (xlog_rec_header_t *)offset;
 			error = xlog_valid_rec_header(log, rhead, blk_no);
 			if (error)
 				goto bread_err2;
+
 			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-			if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
+			error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+					   &offset);
+			if (error)
 				goto bread_err2;
-			offset = xlog_align(log, blk_no+hblks, bblks, dbp);
+
 			xlog_unpack_data(rhead, offset, log);
 			if ((error = xlog_recover_process_data(log, rhash,
 							rhead, offset, pass)))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3530025..b101990 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,7 +45,6 @@
 #include "xfs_fsops.h"
 #include "xfs_utils.h"
 
-STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
 
@@ -121,6 +120,84 @@ static const struct {
     { sizeof(xfs_sb_t),			 0 }
 };
 
+static DEFINE_MUTEX(xfs_uuid_table_mutex);
+static int xfs_uuid_table_size;
+static uuid_t *xfs_uuid_table;
+
+/*
+ * See if the UUID is unique among mounted XFS filesystems.
+ * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
+ */
+STATIC int
+xfs_uuid_mount(
+	struct xfs_mount	*mp)
+{
+	uuid_t			*uuid = &mp->m_sb.sb_uuid;
+	int			hole, i;
+
+	if (mp->m_flags & XFS_MOUNT_NOUUID)
+		return 0;
+
+	if (uuid_is_nil(uuid)) {
+		cmn_err(CE_WARN,
+			"XFS: Filesystem %s has nil UUID - can't mount",
+			mp->m_fsname);
+		return XFS_ERROR(EINVAL);
+	}
+
+	mutex_lock(&xfs_uuid_table_mutex);
+	for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
+		if (uuid_is_nil(&xfs_uuid_table[i])) {
+			hole = i;
+			continue;
+		}
+		if (uuid_equal(uuid, &xfs_uuid_table[i]))
+			goto out_duplicate;
+	}
+
+	if (hole < 0) {
+		xfs_uuid_table = kmem_realloc(xfs_uuid_table,
+			(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
+			xfs_uuid_table_size  * sizeof(*xfs_uuid_table),
+			KM_SLEEP);
+		hole = xfs_uuid_table_size++;
+	}
+	xfs_uuid_table[hole] = *uuid;
+	mutex_unlock(&xfs_uuid_table_mutex);
+
+	return 0;
+
+ out_duplicate:
+	mutex_unlock(&xfs_uuid_table_mutex);
+	cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount",
+			 mp->m_fsname);
+	return XFS_ERROR(EINVAL);
+}
+
+STATIC void
+xfs_uuid_unmount(
+	struct xfs_mount	*mp)
+{
+	uuid_t			*uuid = &mp->m_sb.sb_uuid;
+	int			i;
+
+	if (mp->m_flags & XFS_MOUNT_NOUUID)
+		return;
+
+	mutex_lock(&xfs_uuid_table_mutex);
+	for (i = 0; i < xfs_uuid_table_size; i++) {
+		if (uuid_is_nil(&xfs_uuid_table[i]))
+			continue;
+		if (!uuid_equal(uuid, &xfs_uuid_table[i]))
+			continue;
+		memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
+		break;
+	}
+	ASSERT(i < xfs_uuid_table_size);
+	mutex_unlock(&xfs_uuid_table_mutex);
+}
+
+
 /*
  * Free up the resources associated with a mount structure.  Assume that
  * the structure was initially zeroed, so we can tell which fields got
@@ -256,6 +333,22 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(ENOSYS);
 	}
 
+	/*
+	 * Currently only very few inode sizes are supported.
+	 */
+	switch (sbp->sb_inodesize) {
+	case 256:
+	case 512:
+	case 1024:
+	case 2048:
+		break;
+	default:
+		xfs_fs_mount_cmn_err(flags,
+			"inode size of %d bytes not supported",
+			sbp->sb_inodesize);
+		return XFS_ERROR(ENOSYS);
+	}
+
 	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
 	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
 		xfs_fs_mount_cmn_err(flags,
@@ -574,32 +667,10 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
 	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
 	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
-	mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode);
 	mp->m_blockmask = sbp->sb_blocksize - 1;
 	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
 	mp->m_blockwmask = mp->m_blockwsize - 1;
 
-	/*
-	 * Setup for attributes, in case they get created.
-	 * This value is for inodes getting attributes for the first time,
-	 * the per-inode value is for old attribute values.
-	 */
-	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
-	switch (sbp->sb_inodesize) {
-	case 256:
-		mp->m_attroffset = XFS_LITINO(mp) -
-				   XFS_BMDR_SPACE_CALC(MINABTPTRS);
-		break;
-	case 512:
-	case 1024:
-	case 2048:
-		mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
-		break;
-	default:
-		ASSERT(0);
-	}
-	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
-
 	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
 	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
 	mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
@@ -645,7 +716,7 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
 	for (index = 0; index < agcount; index++) {
 		/*
 		 * read the agf, then the agi. This gets us
-		 * all the inforamtion we need and populates the
+		 * all the information we need and populates the
 		 * per-ag structures for us.
 		 */
 		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
@@ -886,8 +957,6 @@ xfs_check_sizes(xfs_mount_t *mp)
 }
 
 /*
- * xfs_mountfs
- *
  * This function does the following on an initial mount of a file system:
  *	- reads the superblock from disk and init the mount struct
  *	- if we're a 32-bit kernel, do a size check on the superblock
@@ -905,7 +974,6 @@ xfs_mountfs(
 	xfs_inode_t	*rip;
 	__uint64_t	resblks;
 	uint		quotamount, quotaflags;
-	int		uuid_mounted = 0;
 	int		error = 0;
 
 	xfs_mount_common(mp, sbp);
@@ -960,7 +1028,7 @@ xfs_mountfs(
 	 */
 	error = xfs_update_alignment(mp);
 	if (error)
-		goto error1;
+		goto out;
 
 	xfs_alloc_compute_maxlevels(mp);
 	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
@@ -971,19 +1039,9 @@ xfs_mountfs(
 
 	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
 
-	/*
-	 * XFS uses the uuid from the superblock as the unique
-	 * identifier for fsid.  We can not use the uuid from the volume
-	 * since a single partition filesystem is identical to a single
-	 * partition volume/filesystem.
-	 */
-	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
-		if (xfs_uuid_mount(mp)) {
-			error = XFS_ERROR(EINVAL);
-			goto error1;
-		}
-		uuid_mounted=1;
-	}
+	error = xfs_uuid_mount(mp);
+	if (error)
+		goto out;
 
 	/*
 	 * Set the minimum read and write sizes
@@ -1007,7 +1065,7 @@ xfs_mountfs(
 	 */
 	error = xfs_check_sizes(mp);
 	if (error)
-		goto error1;
+		goto out_remove_uuid;
 
 	/*
 	 * Initialize realtime fields in the mount structure
@@ -1015,7 +1073,7 @@ xfs_mountfs(
 	error = xfs_rtmount_init(mp);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: RT mount failed");
-		goto error1;
+		goto out_remove_uuid;
 	}
 
 	/*
@@ -1045,26 +1103,26 @@ xfs_mountfs(
 	mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
 				  KM_MAYFAIL);
 	if (!mp->m_perag)
-		goto error1;
+		goto out_remove_uuid;
 
 	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
 
+	if (!sbp->sb_logblocks) {
+		cmn_err(CE_WARN, "XFS: no log defined");
+		XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
+		error = XFS_ERROR(EFSCORRUPTED);
+		goto out_free_perag;
+	}
+
 	/*
 	 * log's mount-time initialization. Perform 1st part recovery if needed
 	 */
-	if (likely(sbp->sb_logblocks > 0)) {	/* check for volume case */
-		error = xfs_log_mount(mp, mp->m_logdev_targp,
-				      XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
-				      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
-		if (error) {
-			cmn_err(CE_WARN, "XFS: log mount failed");
-			goto error2;
-		}
-	} else {	/* No log has been defined */
-		cmn_err(CE_WARN, "XFS: no log defined");
-		XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
-		error = XFS_ERROR(EFSCORRUPTED);
-		goto error2;
+	error = xfs_log_mount(mp, mp->m_logdev_targp,
+			      XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
+			      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
+	if (error) {
+		cmn_err(CE_WARN, "XFS: log mount failed");
+		goto out_free_perag;
 	}
 
 	/*
@@ -1086,15 +1144,14 @@ xfs_mountfs(
 	 * If we are currently making the filesystem, the initialisation will
 	 * fail as the perag data is in an undefined state.
 	 */
-
 	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
 	    !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
 	     !mp->m_sb.sb_inprogress) {
 		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
-		if (error) {
-			goto error2;
-		}
+		if (error)
+			goto out_free_perag;
 	}
+
 	/*
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
@@ -1102,7 +1159,7 @@ xfs_mountfs(
 	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: failed to read root inode");
-		goto error3;
+		goto out_log_dealloc;
 	}
 
 	ASSERT(rip != NULL);
@@ -1116,7 +1173,7 @@ xfs_mountfs(
 		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
 				 mp);
 		error = XFS_ERROR(EFSCORRUPTED);
-		goto error4;
+		goto out_rele_rip;
 	}
 	mp->m_rootip = rip;	/* save it */
 
@@ -1131,7 +1188,7 @@ xfs_mountfs(
 		 * Free up the root inode.
 		 */
 		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
-		goto error4;
+		goto out_rele_rip;
 	}
 
 	/*
@@ -1143,7 +1200,7 @@ xfs_mountfs(
 		error = xfs_mount_log_sb(mp, mp->m_update_flags);
 		if (error) {
 			cmn_err(CE_WARN, "XFS: failed to write sb changes");
-			goto error4;
+			goto out_rtunmount;
 		}
 	}
 
@@ -1152,7 +1209,7 @@ xfs_mountfs(
 	 */
 	error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
 	if (error)
-		goto error4;
+		goto out_rtunmount;
 
 	/*
 	 * Finish recovering the file system.  This part needed to be
@@ -1162,7 +1219,7 @@ xfs_mountfs(
 	error = xfs_log_mount_finish(mp);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: log mount finish failed");
-		goto error4;
+		goto out_rtunmount;
 	}
 
 	/*
@@ -1170,7 +1227,7 @@ xfs_mountfs(
 	 */
 	error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
 	if (error)
-		goto error4;
+		goto out_rtunmount;
 
 	/*
 	 * Now we are mounted, reserve a small amount of unused space for
@@ -1194,18 +1251,17 @@ xfs_mountfs(
 
 	return 0;
 
- error4:
-	/*
-	 * Free up the root inode.
-	 */
+ out_rtunmount:
+	xfs_rtunmount_inodes(mp);
+ out_rele_rip:
 	IRELE(rip);
- error3:
-	xfs_log_unmount_dealloc(mp);
- error2:
+ out_log_dealloc:
+	xfs_log_unmount(mp);
+ out_free_perag:
 	xfs_free_perag(mp);
- error1:
-	if (uuid_mounted)
-		uuid_table_remove(&mp->m_sb.sb_uuid);
+ out_remove_uuid:
+	xfs_uuid_unmount(mp);
+ out:
 	return error;
 }
 
@@ -1226,15 +1282,12 @@ xfs_unmountfs(
 	 */
 	XFS_QM_UNMOUNT(mp);
 
-	if (mp->m_rbmip)
-		IRELE(mp->m_rbmip);
-	if (mp->m_rsumip)
-		IRELE(mp->m_rsumip);
+	xfs_rtunmount_inodes(mp);
 	IRELE(mp->m_rootip);
 
 	/*
 	 * We can potentially deadlock here if we have an inode cluster
-	 * that has been freed has it's buffer still pinned in memory because
+	 * that has been freed has its buffer still pinned in memory because
 	 * the transaction is still sitting in a iclog. The stale inodes
 	 * on that buffer will have their flush locks held until the
 	 * transaction hits the disk and the callbacks run. the inode
@@ -1266,7 +1319,7 @@ xfs_unmountfs(
 	 * Unreserve any blocks we have so that when we unmount we don't account
 	 * the reserved free space as used. This is really only necessary for
 	 * lazy superblock counting because it trusts the incore superblock
-	 * counters to be aboslutely correct on clean unmount.
+	 * counters to be absolutely correct on clean unmount.
 	 *
 	 * We don't bother correcting this elsewhere for lazy superblock
 	 * counting because on mount of an unclean filesystem we reconstruct the
@@ -1288,10 +1341,9 @@ xfs_unmountfs(
 				"Freespace may not be correct on next mount.");
 	xfs_unmountfs_writesb(mp);
 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
-	xfs_log_unmount(mp);			/* Done! No more fs ops. */
-
-	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
-		uuid_table_remove(&mp->m_sb.sb_uuid);
+	xfs_log_unmount_write(mp);
+	xfs_log_unmount(mp);
+	xfs_uuid_unmount(mp);
 
 #if defined(DEBUG)
 	xfs_errortag_clearall(mp, 0);
@@ -1793,29 +1845,6 @@ xfs_freesb(
 }
 
 /*
- * See if the UUID is unique among mounted XFS filesystems.
- * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
- */
-STATIC int
-xfs_uuid_mount(
-	xfs_mount_t	*mp)
-{
-	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
-		cmn_err(CE_WARN,
-			"XFS: Filesystem %s has nil UUID - can't mount",
-			mp->m_fsname);
-		return -1;
-	}
-	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
-		cmn_err(CE_WARN,
-			"XFS: Filesystem %s has duplicate UUID - can't mount",
-			mp->m_fsname);
-		return -1;
-	}
-	return 0;
-}
-
-/*
  * Used to log changes to the superblock unit and width fields which could
  * be altered by the mount options, as well as any potential sb_features2
  * fixup. Only the first superblock is updated.
@@ -1868,7 +1897,7 @@ xfs_mount_log_sb(
  * we disable the per-cpu counter and go through the slow path.
  *
  * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain it's resources back to
+ * when we disable a per-cpu counter, we need to drain its resources back to
  * the global superblock. We do this after disabling the counter to prevent
  * more threads from queueing up on the counter.
  *
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f5e9937..7af44ad 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -136,7 +136,6 @@ typedef int	(*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
 			struct xfs_dquot *, struct xfs_dquot *, uint);
 typedef void	(*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
 typedef int	(*xfs_dqsync_t)(struct xfs_mount *, int flags);
-typedef int	(*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t);
 
 typedef struct xfs_qmops {
 	xfs_qminit_t		xfs_qminit;
@@ -154,7 +153,6 @@ typedef struct xfs_qmops {
 	xfs_dqvopchownresv_t	xfs_dqvopchownresv;
 	xfs_dqstatvfs_t		xfs_dqstatvfs;
 	xfs_dqsync_t		xfs_dqsync;
-	xfs_quotactl_t		xfs_quotactl;
 	struct xfs_dqtrxops	*xfs_dqtrxops;
 } xfs_qmops_t;
 
@@ -188,8 +186,6 @@ typedef struct xfs_qmops {
 	(*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
 #define XFS_QM_DQSYNC(mp, flags) \
 	(*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-#define XFS_QM_QUOTACTL(mp, cmd, id, addr) \
-	(*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr)
 
 #ifdef HAVE_PERCPU_SB
 
@@ -273,19 +269,17 @@ typedef struct xfs_mount {
 	uint			m_inobt_mnr[2];	/* min inobt btree records */
 	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
-	uint			m_in_maxlevels;	/* XFS_IN_MAXLEVELS */
+	uint			m_in_maxlevels;	/* max inobt btree levels. */
 	struct xfs_perag	*m_perag;	/* per-ag accounting info */
 	struct rw_semaphore	m_peraglock;	/* lock for m_perag (pointer) */
 	struct mutex		m_growlock;	/* growfs mutex */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_dmevmask;	/* DMI events for this FS */
 	__uint64_t		m_flags;	/* global mount flags */
-	uint			m_attroffset;	/* inode attribute offset */
 	uint			m_dir_node_ents; /* #entries in a dir danode */
 	uint			m_attr_node_ents; /* #entries in attr danode */
 	int			m_ialloc_inos;	/* inodes in inode allocation */
 	int			m_ialloc_blks;	/* blocks in inode allocation */
-	int			m_litino;	/* size of inode union area */
 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
 	uint			m_qflags;	/* quota status flags */
 	xfs_trans_reservations_t m_reservations;/* precomputed res values */
@@ -293,9 +287,6 @@ typedef struct xfs_mount {
 	__uint64_t		m_maxioffset;	/* maximum inode offset */
 	__uint64_t		m_resblks;	/* total reserved blocks */
 	__uint64_t		m_resblks_avail;/* available reserved blocks */
-#if XFS_BIG_INUMS
-	xfs_ino_t		m_inoadd;	/* add value for ino64_offset */
-#endif
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
 	int			m_sinoalign;	/* stripe unit inode alignment */
@@ -337,7 +328,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 						   must be synchronous except
 						   for space allocations */
-#define XFS_MOUNT_INO64		(1ULL << 1)
 #define XFS_MOUNT_DMAPI		(1ULL << 2)	/* dmapi is enabled */
 #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
@@ -389,8 +379,8 @@ typedef struct xfs_mount {
  * Synchronous read and write sizes.  This should be
  * better for NFSv2 wsync filesystems.
  */
-#define	XFS_WSYNC_READIO_LOG	15	/* 32K */
-#define	XFS_WSYNC_WRITEIO_LOG	14	/* 16K */
+#define	XFS_WSYNC_READIO_LOG	15	/* 32k */
+#define	XFS_WSYNC_WRITEIO_LOG	14	/* 16k */
 
 /*
  * Allow large block sizes to be reported to userspace programs if the
@@ -500,9 +490,6 @@ typedef struct xfs_mod_sb {
 	int64_t		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
-#define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock))
-#define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
-
 extern int	xfs_log_sbcount(xfs_mount_t *, uint);
 extern int	xfs_mountfs(xfs_mount_t *mp);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 27f8058..e101790 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -126,7 +126,6 @@ static struct xfs_qmops xfs_qmcore_stub = {
 	.xfs_dqvopchownresv	= (xfs_dqvopchownresv_t) fs_noerr,
 	.xfs_dqstatvfs		= (xfs_dqstatvfs_t) fs_noval,
 	.xfs_dqsync		= (xfs_dqsync_t) fs_noerr,
-	.xfs_quotactl		= (xfs_quotactl_t) fs_nosys,
 };
 
 int
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 48965ec..f5d1202 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -18,6 +18,8 @@
 #ifndef __XFS_QUOTA_H__
 #define __XFS_QUOTA_H__
 
+struct xfs_trans;
+
 /*
  * The ondisk form of a dquot structure.
  */
@@ -185,7 +187,6 @@ typedef struct xfs_qoff_logformat {
  * to a single function. None of these XFS_QMOPT_* flags are meant to have
  * persistent values (ie. their values can and will change between versions)
  */
-#define XFS_QMOPT_DQLOCK	0x0000001 /* dqlock */
 #define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
 #define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
 #define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c5bb86f..385f6dc 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2288,6 +2288,16 @@ xfs_rtmount_inodes(
 	return 0;
 }
 
+void
+xfs_rtunmount_inodes(
+	struct xfs_mount	*mp)
+{
+	if (mp->m_rbmip)
+		IRELE(mp->m_rbmip);
+	if (mp->m_rsumip)
+		IRELE(mp->m_rsumip);
+}
+
 /*
  * Pick an extent for allocation at the start of a new realtime file.
  * Use the sequence number stored in the atime field of the bitmap inode.
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 8d8dcd2..b2d67ad 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -23,8 +23,8 @@ struct xfs_trans;
 
 /* Min and max rt extent sizes, specified in bytes */
 #define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
-#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64KB */
-#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4KB */
+#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64kB */
+#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4kB */
 
 /*
  * Constants for bit manipulations.
@@ -108,6 +108,9 @@ xfs_rtfree_extent(
 int					/* error */
 xfs_rtmount_init(
 	struct xfs_mount	*mp);	/* file system mount structure */
+void
+xfs_rtunmount_inodes(
+	struct xfs_mount	*mp);
 
 /*
  * Get the bitmap and summary inodes into the mount structure
@@ -146,6 +149,7 @@ xfs_growfs_rt(
 # define xfs_growfs_rt(mp,in)                           (ENOSYS)
 # define xfs_rtmount_init(m)    (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
 # define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+# define xfs_rtunmount_inodes(m)
 #endif	/* CONFIG_XFS_RT */
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index d6fe4a8..775249a 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -292,7 +292,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  * In a write transaction we can allocate a maximum of 2
  * extents.  This gives:
  *    the inode getting the new extents: inode size
- *    the inode\'s bmap btree: max depth * block size
+ *    the inode's bmap btree: max depth * block size
  *    the agfs of the ags from which the extents are allocated: 2 * sector
  *    the superblock free block counter: sector size
  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
@@ -321,7 +321,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 /*
  * In truncating a file we free up to two extents at once.  We can modify:
  *    the inode being truncated: inode size
- *    the inode\'s bmap btree: (max depth + 1) * block size
+ *    the inode's bmap btree: (max depth + 1) * block size
  * And the bmap_finish transaction can free the blocks and bmap blocks:
  *    the agf for each of the ags: 4 * sector size
  *    the agfl for each of the ags: 4 * sector size
@@ -343,7 +343,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 	  (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
 	  (128 * 5) + \
 	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-	   (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	   (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
 	    XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
 
 #define	XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
@@ -431,8 +431,8 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  *    the new inode: inode size
  *    the inode btree entry: 1 block
  *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode\'s bmap btree: (max depth + v2) * block size
- *    the blocks for the symlink: 1 KB
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ *    the blocks for the symlink: 1 kB
  * Or in the first xact we allocate some inodes giving:
  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
  *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
@@ -449,9 +449,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 	  (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
 	 (2 * (mp)->m_sb.sb_sectsize + \
 	  XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-	  XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+	  XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
 	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
 	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
 
 #define	XFS_SYMLINK_LOG_RES(mp)	((mp)->m_reservations.tr_symlink)
@@ -463,7 +463,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  *    the inode btree entry: block size
  *    the superblock for the nlink flag: sector size
  *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode\'s bmap btree: (max depth + v2) * block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
  * Or in the first xact we allocate some inodes giving:
  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
  *    the superblock for the nlink flag: sector size
@@ -481,9 +481,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 	  (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
 	 (3 * (mp)->m_sb.sb_sectsize + \
 	  XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-	  XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+	  XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
 	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
 	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
 
 #define	XFS_CREATE_LOG_RES(mp)	((mp)->m_reservations.tr_create)
@@ -513,7 +513,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 	 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
 	 (128 * 5) + \
 	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
 	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
 
 
@@ -637,7 +637,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 /*
  * Removing the attribute fork of a file
  *    the inode being truncated: inode size
- *    the inode\'s bmap btree: max depth * block size
+ *    the inode's bmap btree: max depth * block size
  * And the bmap_finish transaction can free the blocks and bmap blocks:
  *    the agf for each of the ags: 4 * sector size
  *    the agfl for each of the ags: 4 * sector size
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2d47f10..f31271c 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -79,7 +79,7 @@ xfs_trans_ail_tail(
  * the push is run asynchronously in a separate thread, so we return the tail
  * of the log right now instead of the tail after the push. This means we will
  * either continue right away, or we will sleep waiting on the async thread to
- * do it's work.
+ * do its work.
  *
  * We do this unlocked - we only need to know whether there is anything in the
  * AIL at the time we are called. We don't need to access the contents of
@@ -160,7 +160,7 @@ xfs_trans_ail_cursor_next(
 /*
  * Now that the traversal is complete, we need to remove the cursor
  * from the list of traversing cursors. Avoid removing the embedded
- * push cursor, but use the fact it is alway present to make the
+ * push cursor, but use the fact it is always present to make the
  * list deletion simple.
  */
 void
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index e110bf5..eb3fc57 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,7 +22,7 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-/* XXX: from here down needed until struct xfs_trans has it's own ailp */
+/* XXX: from here down needed until struct xfs_trans has its own ailp */
 #include "xfs_bit.h"
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index 4ea2e50..7d2c920 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -47,7 +47,7 @@
 #define	XFS_DIRREMOVE_SPACE_RES(mp)	\
 	XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
 #define	XFS_IALLOC_SPACE_RES(mp)	\
-	(XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1)
+	(XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1)
 
 /*
  * Space reservation values for various transactions.
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index b2f7245..d725428 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -21,14 +21,6 @@
 #ifdef __KERNEL__
 
 /*
- * POSIX Extensions
- */
-typedef unsigned char		uchar_t;
-typedef unsigned short		ushort_t;
-typedef unsigned int		uint_t;
-typedef unsigned long		ulong_t;
-
-/*
  * Additional type declarations for XFS
  */
 typedef signed char		__int8_t;
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fcc2285..79b9e5e 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -374,7 +374,7 @@ xfs_truncate_file(
 
 	/*
 	 * Follow the normal truncate locking protocol.  Since we
-	 * hold the inode in the transaction, we know that it's number
+	 * hold the inode in the transaction, we know that its number
 	 * of references will stay constant.
 	 */
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 0e55c5d..7394c7a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1136,7 +1136,7 @@ xfs_inactive(
 	 * If the inode is already free, then there can be nothing
 	 * to clean up here.
 	 */
-	if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {
+	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
 		ASSERT(ip->i_df.if_real_bytes == 0);
 		ASSERT(ip->i_df.if_broot_bytes == 0);
 		return VN_INACTIVE_CACHE;
@@ -1387,23 +1387,28 @@ xfs_create(
 	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
-	xfs_mount_t		*mp = dp->i_mount;
-	xfs_inode_t		*ip;
-	xfs_trans_t		*tp;
+	int			is_dir = S_ISDIR(mode);
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_inode	*ip = NULL;
+	struct xfs_trans	*tp = NULL;
 	int			error;
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
 	boolean_t		unlock_dp_on_error = B_FALSE;
-	int			dm_event_sent = 0;
 	uint			cancel_flags;
 	int			committed;
 	xfs_prid_t		prid;
-	struct xfs_dquot	*udqp, *gdqp;
+	struct xfs_dquot	*udqp = NULL;
+	struct xfs_dquot	*gdqp = NULL;
 	uint			resblks;
+	uint			log_res;
+	uint			log_count;
 
-	ASSERT(!*ipp);
 	xfs_itrace_entry(dp);
 
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
 				dp, DM_RIGHT_NULL, NULL,
@@ -1412,84 +1417,97 @@ xfs_create(
 
 		if (error)
 			return error;
-		dm_event_sent = 1;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	/* Return through std_return after this point. */
-
-	udqp = gdqp = NULL;
 	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 		prid = dp->i_d.di_projid;
 	else
-		prid = (xfs_prid_t)dfltprid;
+		prid = dfltprid;
 
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
 			current_fsuid(), current_fsgid(), prid,
-			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
 
-	ip = NULL;
+	if (is_dir) {
+		rdev = 0;
+		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
+		log_res = XFS_MKDIR_LOG_RES(mp);
+		log_count = XFS_MKDIR_LOG_COUNT;
+		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
+	} else {
+		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
+		log_res = XFS_CREATE_LOG_RES(mp);
+		log_count = XFS_CREATE_LOG_COUNT;
+		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
+	}
 
-	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
+
 	/*
 	 * Initially assume that the file does not exist and
 	 * reserve the resources for that case.  If that is not
 	 * the case we'll drop the one we have and get a more
 	 * appropriate transaction later.
 	 */
-	error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+	error = xfs_trans_reserve(tp, resblks, log_res, 0,
+			XFS_TRANS_PERM_LOG_RES, log_count);
 	if (error == ENOSPC) {
 		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+		error = xfs_trans_reserve(tp, 0, log_res, 0,
+				XFS_TRANS_PERM_LOG_RES, log_count);
 	}
 	if (error) {
 		cancel_flags = 0;
-		goto error_return;
+		goto out_trans_cancel;
 	}
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = B_TRUE;
 
-	xfs_bmap_init(&free_list, &first_block);
+	/*
+	 * Check for directory link count overflow.
+	 */
+	if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
+		error = XFS_ERROR(EMLINK);
+		goto out_trans_cancel;
+	}
 
-	ASSERT(ip == NULL);
+	xfs_bmap_init(&free_list, &first_block);
 
 	/*
 	 * Reserve disk quota and the inode.
 	 */
 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
 	if (error)
-		goto error_return;
+		goto out_trans_cancel;
 
 	error = xfs_dir_canenter(tp, dp, name, resblks);
 	if (error)
-		goto error_return;
-	error = xfs_dir_ialloc(&tp, dp, mode, 1,
-			rdev, credp, prid, resblks > 0,
-			&ip, &committed);
+		goto out_trans_cancel;
+
+	/*
+	 * A newly created regular or special file just has one directory
+	 * entry pointing to them, but a directory also the "." entry
+	 * pointing to itself.
+	 */
+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp,
+			       prid, resblks > 0, &ip, &committed);
 	if (error) {
 		if (error == ENOSPC)
-			goto error_return;
-		goto abort_return;
+			goto out_trans_cancel;
+		goto out_trans_abort;
 	}
-	xfs_itrace_ref(ip);
 
 	/*
 	 * At this point, we've gotten a newly allocated inode.
 	 * It is locked (and joined to the transaction).
 	 */
-
+	xfs_itrace_ref(ip);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	/*
@@ -1508,19 +1526,28 @@ xfs_create(
 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
 		ASSERT(error != ENOSPC);
-		goto abort_return;
+		goto out_trans_abort;
 	}
 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
+	if (is_dir) {
+		error = xfs_dir_init(tp, ip, dp);
+		if (error)
+			goto out_bmap_cancel;
+
+		error = xfs_bumplink(tp, dp);
+		if (error)
+			goto out_bmap_cancel;
+	}
+
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * create transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 		xfs_trans_set_sync(tp);
-	}
 
 	/*
 	 * Attach the dquot(s) to the inodes and modify them incore.
@@ -1537,16 +1564,13 @@ xfs_create(
 	IHOLD(ip);
 
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error) {
-		xfs_bmap_cancel(&free_list);
-		goto abort_rele;
-	}
+	if (error)
+		goto out_abort_rele;
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 	if (error) {
 		IRELE(ip);
-		tp = NULL;
-		goto error_return;
+		goto out_dqrele;
 	}
 
 	XFS_QM_DQRELE(mp, udqp);
@@ -1555,26 +1579,22 @@ xfs_create(
 	*ipp = ip;
 
 	/* Fallthrough to std_return with error = 0  */
-
-std_return:
-	if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
-	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-			dp, DM_RIGHT_NULL,
-			*ipp ? ip : NULL,
-			DM_RIGHT_NULL, name->name, NULL,
-			mode, error, 0);
+ std_return:
+	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
+		XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL,
+				ip, DM_RIGHT_NULL, name->name, NULL, mode,
+				error, 0);
 	}
+
 	return error;
 
- abort_return:
+ out_bmap_cancel:
+	xfs_bmap_cancel(&free_list);
+ out_trans_abort:
 	cancel_flags |= XFS_TRANS_ABORT;
-	/* FALLTHROUGH */
-
- error_return:
-	if (tp != NULL)
-		xfs_trans_cancel(tp, cancel_flags);
-
+ out_trans_cancel:
+	xfs_trans_cancel(tp, cancel_flags);
+ out_dqrele:
 	XFS_QM_DQRELE(mp, udqp);
 	XFS_QM_DQRELE(mp, gdqp);
 
@@ -1583,20 +1603,18 @@ std_return:
 
 	goto std_return;
 
- abort_rele:
+ out_abort_rele:
 	/*
 	 * Wait until after the current transaction is aborted to
 	 * release the inode.  This prevents recursive transactions
 	 * and deadlocks from xfs_inactive.
 	 */
+	xfs_bmap_cancel(&free_list);
 	cancel_flags |= XFS_TRANS_ABORT;
 	xfs_trans_cancel(tp, cancel_flags);
 	IRELE(ip);
-
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
-
-	goto std_return;
+	unlock_dp_on_error = B_FALSE;
+	goto out_dqrele;
 }
 
 #ifdef DEBUG
@@ -2004,8 +2022,10 @@ xfs_link(
 	/* Return through std_return after this point. */
 
 	error = XFS_QM_DQATTACH(mp, sip, 0);
-	if (!error && sip != tdp)
-		error = XFS_QM_DQATTACH(mp, tdp, 0);
+	if (error)
+		goto std_return;
+
+	error = XFS_QM_DQATTACH(mp, tdp, 0);
 	if (error)
 		goto std_return;
 
@@ -2110,209 +2130,6 @@ std_return:
 	goto std_return;
 }
 
-
-int
-xfs_mkdir(
-	xfs_inode_t             *dp,
-	struct xfs_name		*dir_name,
-	mode_t			mode,
-	xfs_inode_t		**ipp,
-	cred_t			*credp)
-{
-	xfs_mount_t		*mp = dp->i_mount;
-	xfs_inode_t		*cdp;	/* inode of created dir */
-	xfs_trans_t		*tp;
-	int			cancel_flags;
-	int			error;
-	int			committed;
-	xfs_bmap_free_t         free_list;
-	xfs_fsblock_t           first_block;
-	boolean_t		unlock_dp_on_error = B_FALSE;
-	boolean_t		created = B_FALSE;
-	int			dm_event_sent = 0;
-	xfs_prid_t		prid;
-	struct xfs_dquot	*udqp, *gdqp;
-	uint			resblks;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	tp = NULL;
-
-	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-					dp, DM_RIGHT_NULL, NULL,
-					DM_RIGHT_NULL, dir_name->name, NULL,
-					mode, 0, 0);
-		if (error)
-			return error;
-		dm_event_sent = 1;
-	}
-
-	/* Return through std_return after this point. */
-
-	xfs_itrace_entry(dp);
-
-	mp = dp->i_mount;
-	udqp = gdqp = NULL;
-	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-		prid = dp->i_d.di_projid;
-	else
-		prid = (xfs_prid_t)dfltprid;
-
-	/*
-	 * Make sure that we have allocated dquot(s) on disk.
-	 */
-	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(), current_fsgid(), prid,
-			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
-	if (error)
-		goto std_return;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-	resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len);
-	error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
-				  XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
-	if (error == ENOSPC) {
-		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
-					  XFS_TRANS_PERM_LOG_RES,
-					  XFS_MKDIR_LOG_COUNT);
-	}
-	if (error) {
-		cancel_flags = 0;
-		goto error_return;
-	}
-
-	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-	unlock_dp_on_error = B_TRUE;
-
-	/*
-	 * Check for directory link count overflow.
-	 */
-	if (dp->i_d.di_nlink >= XFS_MAXLINK) {
-		error = XFS_ERROR(EMLINK);
-		goto error_return;
-	}
-
-	/*
-	 * Reserve disk quota and the inode.
-	 */
-	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
-	if (error)
-		goto error_return;
-
-	error = xfs_dir_canenter(tp, dp, dir_name, resblks);
-	if (error)
-		goto error_return;
-	/*
-	 * create the directory inode.
-	 */
-	error = xfs_dir_ialloc(&tp, dp, mode, 2,
-			0, credp, prid, resblks > 0,
-		&cdp, NULL);
-	if (error) {
-		if (error == ENOSPC)
-			goto error_return;
-		goto abort_return;
-	}
-	xfs_itrace_ref(cdp);
-
-	/*
-	 * Now we add the directory inode to the transaction.
-	 * We waited until now since xfs_dir_ialloc might start
-	 * a new transaction.  Had we joined the transaction
-	 * earlier, the locks might have gotten released. An error
-	 * from here on will result in the transaction cancel
-	 * unlocking dp so don't do it explicitly in the error path.
-	 */
-	IHOLD(dp);
-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-	unlock_dp_on_error = B_FALSE;
-
-	xfs_bmap_init(&free_list, &first_block);
-
-	error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino,
-					&first_block, &free_list, resblks ?
-					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
-	if (error) {
-		ASSERT(error != ENOSPC);
-		goto error1;
-	}
-	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-	error = xfs_dir_init(tp, cdp, dp);
-	if (error)
-		goto error2;
-
-	error = xfs_bumplink(tp, dp);
-	if (error)
-		goto error2;
-
-	created = B_TRUE;
-
-	*ipp = cdp;
-	IHOLD(cdp);
-
-	/*
-	 * Attach the dquots to the new inode and modify the icount incore.
-	 */
-	XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * mkdir transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-		xfs_trans_set_sync(tp);
-	}
-
-	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error) {
-		IRELE(cdp);
-		goto error2;
-	}
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
-	if (error) {
-		IRELE(cdp);
-	}
-
-	/* Fall through to std_return with error = 0 or errno from
-	 * xfs_trans_commit. */
-
-std_return:
-	if ((created || (error != 0 && dm_event_sent != 0)) &&
-	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-					dp, DM_RIGHT_NULL,
-					created ? cdp : NULL,
-					DM_RIGHT_NULL,
-					dir_name->name, NULL,
-					mode, error, 0);
-	}
-	return error;
-
- error2:
- error1:
-	xfs_bmap_cancel(&free_list);
- abort_return:
-	cancel_flags |= XFS_TRANS_ABORT;
- error_return:
-	xfs_trans_cancel(tp, cancel_flags);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
-
-	if (unlock_dp_on_error)
-		xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-	goto std_return;
-}
-
 int
 xfs_symlink(
 	xfs_inode_t		*dp,
@@ -2587,51 +2404,6 @@ std_return:
 }
 
 int
-xfs_inode_flush(
-	xfs_inode_t	*ip,
-	int		flags)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	int		error = 0;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	/*
-	 * Bypass inodes which have already been cleaned by
-	 * the inode flush clustering code inside xfs_iflush
-	 */
-	if (xfs_inode_clean(ip))
-		return 0;
-
-	/*
-	 * We make this non-blocking if the inode is contended,
-	 * return EAGAIN to indicate to the caller that they
-	 * did not succeed. This prevents the flush path from
-	 * blocking on inodes inside another operation right
-	 * now, they get caught later by xfs_sync.
-	 */
-	if (flags & FLUSH_SYNC) {
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-		xfs_iflock(ip);
-	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-			xfs_iunlock(ip, XFS_ILOCK_SHARED);
-			return EAGAIN;
-		}
-	} else {
-		return EAGAIN;
-	}
-
-	error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
-						    : XFS_IFLUSH_ASYNC_NOBLOCK);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	return error;
-}
-
-
-int
 xfs_set_dmattrs(
 	xfs_inode_t     *ip,
 	u_int		evmask,
@@ -2676,7 +2448,7 @@ xfs_reclaim(
 	ASSERT(!VN_MAPPED(VFS_I(ip)));
 
 	/* bad inode, get out here ASAP */
-	if (VN_BAD(VFS_I(ip))) {
+	if (is_bad_inode(VFS_I(ip))) {
 		xfs_ireclaim(ip);
 		return 0;
 	}
@@ -3090,7 +2862,7 @@ xfs_free_file_space(
 
 	/*
 	 * Need to zero the stuff we're not freeing, on disk.
-	 * If its a realtime file & can't use unwritten extents then we
+	 * If it's a realtime file & can't use unwritten extents then we
 	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
 	 * will take care of it for us.
 	 */
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 76df328..04373c6 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -31,14 +31,11 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
 		struct xfs_inode *ip);
 int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 		struct xfs_name *target_name);
-int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
-		mode_t mode, struct xfs_inode **ipp, cred_t *credp);
 int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 		       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, mode_t mode, struct xfs_inode **ipp,
 		cred_t *credp);
-int xfs_inode_flush(struct xfs_inode *ip, int flags);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_reclaim(struct xfs_inode *ip);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8209e08..66ec05a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -139,6 +139,9 @@ struct target_type {
 	dm_ioctl_fn ioctl;
 	dm_merge_fn merge;
 	dm_busy_fn busy;
+
+	/* For internal device-mapper use. */
+	struct list_head list;
 };
 
 struct io_restrictions {
diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h
index 600c5fb..5e8b11d 100644
--- a/include/linux/dm-dirty-log.h
+++ b/include/linux/dm-dirty-log.h
@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
 	const char *name;
 	struct module *module;
 
+	/* For internal device-mapper use */
+	struct list_head list;
+
 	int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
 		   unsigned argc, char **argv);
 	void (*dtr)(struct dm_dirty_log *log);
@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
 	 */
 	int (*status)(struct dm_dirty_log *log, status_type_t status_type,
 		      char *result, unsigned maxlen);
+
+	/*
+	 * is_remote_recovering is necessary for cluster mirroring. It provides
+	 * a way to detect recovery on another node, so we aren't writing
+	 * concurrently.  This function is likely to block (when a cluster log
+	 * is used).
+	 *
+	 * Returns: 0, 1
+	 */
+	int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
 };
 
 int dm_dirty_log_type_register(struct dm_dirty_log_type *type);
diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h
index ed21bd3..29ee287 100644
--- a/include/linux/hdreg.h
+++ b/include/linux/hdreg.h
@@ -1,68 +1,6 @@
 #ifndef _LINUX_HDREG_H
 #define _LINUX_HDREG_H
 
-#ifdef __KERNEL__
-#include <linux/ata.h>
-
-/*
- * This file contains some defines for the AT-hd-controller.
- * Various sources.
- */
-
-/* ide.c has its own port definitions in "ide.h" */
-
-#define HD_IRQ		14
-
-/* Hd controller regs. Ref: IBM AT Bios-listing */
-#define HD_DATA		0x1f0		/* _CTL when writing */
-#define HD_ERROR	0x1f1		/* see err-bits */
-#define HD_NSECTOR	0x1f2		/* nr of sectors to read/write */
-#define HD_SECTOR	0x1f3		/* starting sector */
-#define HD_LCYL		0x1f4		/* starting cylinder */
-#define HD_HCYL		0x1f5		/* high byte of starting cyl */
-#define HD_CURRENT	0x1f6		/* 101dhhhh , d=drive, hhhh=head */
-#define HD_STATUS	0x1f7		/* see status-bits */
-#define HD_FEATURE	HD_ERROR	/* same io address, read=error, write=feature */
-#define HD_PRECOMP	HD_FEATURE	/* obsolete use of this port - predates IDE */
-#define HD_COMMAND	HD_STATUS	/* same io address, read=status, write=cmd */
-
-#define HD_CMD		0x3f6		/* used for resets */
-#define HD_ALTSTATUS	0x3f6		/* same as HD_STATUS but doesn't clear irq */
-
-/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */
-
-/* Bits of HD_STATUS */
-#define ERR_STAT		0x01
-#define INDEX_STAT		0x02
-#define ECC_STAT		0x04	/* Corrected error */
-#define DRQ_STAT		0x08
-#define SEEK_STAT		0x10
-#define SRV_STAT		0x10
-#define WRERR_STAT		0x20
-#define READY_STAT		0x40
-#define BUSY_STAT		0x80
-
-/* Bits for HD_ERROR */
-#define MARK_ERR		0x01	/* Bad address mark */
-#define ILI_ERR			0x01	/* Illegal Length Indication (ATAPI) */
-#define TRK0_ERR		0x02	/* couldn't find track 0 */
-#define EOM_ERR			0x02	/* End Of Media (ATAPI) */
-#define ABRT_ERR		0x04	/* Command aborted */
-#define MCR_ERR			0x08	/* media change request */
-#define ID_ERR			0x10	/* ID field not found */
-#define MC_ERR			0x20	/* media changed */
-#define ECC_ERR			0x40	/* Uncorrectable ECC error */
-#define BBD_ERR			0x80	/* pre-EIDE meaning:  block marked bad */
-#define ICRC_ERR		0x80	/* new meaning:  CRC error during transfer */
-#define LFS_ERR			0xf0	/* Last Failed Sense (ATAPI) */
-
-/* Bits of HD_NSECTOR */
-#define CD			0x01
-#define IO			0x02
-#define REL			0x04
-#define TAG_MASK		0xf8
-#endif /* __KERNEL__ */
-
 #include <linux/types.h>
 
 /*
@@ -191,6 +129,7 @@ typedef struct hd_drive_hob_hdr {
 #define TASKFILE_INVALID		0x7fff
 #endif
 
+#ifndef __KERNEL__
 /* ATA/ATAPI Commands pre T13 Spec */
 #define WIN_NOP				0x00
 /*
@@ -379,6 +318,7 @@ typedef struct hd_drive_hob_hdr {
 #define SECURITY_ERASE_UNIT		0xBD
 #define SECURITY_FREEZE_LOCK		0xBE
 #define SECURITY_DISABLE_PASSWORD	0xBF
+#endif /* __KERNEL__ */
 
 struct hd_geometry {
       unsigned char heads;
@@ -448,6 +388,7 @@ enum {
 
 #define __NEW_HD_DRIVE_ID
 
+#ifndef __KERNEL__
 /*
  * Structure returned by HDIO_GET_IDENTITY, as per ANSI NCITS ATA6 rev.1b spec.
  *
@@ -699,6 +640,7 @@ struct hd_driveid {
 					 *  7:0 Signature
 					 */
 };
+#endif /* __KERNEL__ */
 
 /*
  * IDE "nice" flags. These are used on a per drive basis to determine
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7ff5c55..1fcb712 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -19,8 +19,21 @@ static inline void flush_kernel_dcache_page(struct page *page)
 }
 #endif
 
-#ifdef CONFIG_HIGHMEM
+#include <asm/kmap_types.h>
+
+#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
+
+void debug_kmap_atomic(enum km_type type);
+
+#else
 
+static inline void debug_kmap_atomic(enum km_type type)
+{
+}
+
+#endif
+
+#ifdef CONFIG_HIGHMEM
 #include <asm/highmem.h>
 
 /* declarations for linux/mm/highmem.c */
@@ -44,8 +57,6 @@ static inline void *kmap(struct page *page)
 
 #define kunmap(page) do { (void) (page); } while (0)
 
-#include <asm/kmap_types.h>
-
 static inline void *kmap_atomic(struct page *page, enum km_type idx)
 {
 	pagefault_disable();
@@ -187,16 +198,4 @@ static inline void copy_highpage(struct page *to, struct page *from)
 	kunmap_atomic(vto, KM_USER1);
 }
 
-#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
-
-void debug_kmap_atomic(enum km_type type);
-
-#else
-
-static inline void debug_kmap_atomic(enum km_type type)
-{
-}
-
-#endif
-
 #endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
deleted file mode 100644
index 82bea14..0000000
--- a/include/linux/raid/md.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
-   md.h : Multiple Devices driver for Linux
-          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
-          Copyright (C) 1994-96 Marc ZYNGIER
-	  <zyngier@ufr-info-p7.ibp.fr> or
-	  <maz@gloups.fdn.fr>
-	  
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-   
-   You should have received a copy of the GNU General Public License
-   (for example /usr/src/linux/COPYING); if not, write to the Free
-   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
-*/
-
-#ifndef _MD_H
-#define _MD_H
-
-#include <linux/blkdev.h>
-#include <linux/seq_file.h>
-
-/*
- * 'md_p.h' holds the 'physical' layout of RAID devices
- * 'md_u.h' holds the user <=> kernel API
- *
- * 'md_k.h' holds kernel internal definitions
- */
-
-#include <linux/raid/md_p.h>
-#include <linux/raid/md_u.h>
-#include <linux/raid/md_k.h>
-
-#ifdef CONFIG_MD
-
-/*
- * Different major versions are not compatible.
- * Different minor versions are only downward compatible.
- * Different patchlevel versions are downward and upward compatible.
- */
-#define MD_MAJOR_VERSION                0
-#define MD_MINOR_VERSION                90
-/*
- * MD_PATCHLEVEL_VERSION indicates kernel functionality.
- * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
- *     and major_version/minor_version accordingly
- * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
- *     in the super status byte
- * >=3 means that bitmap superblock version 4 is supported, which uses
- *     little-ending representation rather than host-endian
- */
-#define MD_PATCHLEVEL_VERSION           3
-
-extern int mdp_major;
-
-extern int register_md_personality(struct mdk_personality *p);
-extern int unregister_md_personality(struct mdk_personality *p);
-extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
-				mddev_t *mddev, const char *name);
-extern void md_unregister_thread(mdk_thread_t *thread);
-extern void md_wakeup_thread(mdk_thread_t *thread);
-extern void md_check_recovery(mddev_t *mddev);
-extern void md_write_start(mddev_t *mddev, struct bio *bi);
-extern void md_write_end(mddev_t *mddev);
-extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
-extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
-
-extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
-			   sector_t sector, int size, struct page *page);
-extern void md_super_wait(mddev_t *mddev);
-extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
-			struct page *page, int rw);
-extern void md_do_sync(mddev_t *mddev);
-extern void md_new_event(mddev_t *mddev);
-extern int md_allow_write(mddev_t *mddev);
-extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
-
-#endif /* CONFIG_MD */
-#endif 
-
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h
index 7192035..fb1abb3 100644
--- a/include/linux/raid/md_u.h
+++ b/include/linux/raid/md_u.h
@@ -15,6 +15,24 @@
 #ifndef _MD_U_H
 #define _MD_U_H
 
+/*
+ * Different major versions are not compatible.
+ * Different minor versions are only downward compatible.
+ * Different patchlevel versions are downward and upward compatible.
+ */
+#define MD_MAJOR_VERSION                0
+#define MD_MINOR_VERSION                90
+/*
+ * MD_PATCHLEVEL_VERSION indicates kernel functionality.
+ * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
+ *     and major_version/minor_version accordingly
+ * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
+ *     in the super status byte
+ * >=3 means that bitmap superblock version 4 is supported, which uses
+ *     little-ending representation rather than host-endian
+ */
+#define MD_PATCHLEVEL_VERSION           3
+
 /* ioctls */
 
 /* status */
@@ -46,6 +64,12 @@
 #define STOP_ARRAY_RO		_IO (MD_MAJOR, 0x33)
 #define RESTART_ARRAY_RW	_IO (MD_MAJOR, 0x34)
 
+/* 63 partitions with the alternate major number (mdp) */
+#define MdpMinorShift 6
+#ifdef __KERNEL__
+extern int mdp_major;
+#endif
+
 typedef struct mdu_version_s {
 	int major;
 	int minor;
@@ -85,6 +109,17 @@ typedef struct mdu_array_info_s {
 
 } mdu_array_info_t;
 
+/* non-obvious values for 'level' */
+#define	LEVEL_MULTIPATH		(-4)
+#define	LEVEL_LINEAR		(-1)
+#define	LEVEL_FAULTY		(-5)
+
+/* we need a value for 'no level specified' and 0
+ * means 'raid0', so we need something else.  This is
+ * for internal use only
+ */
+#define	LEVEL_NONE		(-1000000)
+
 typedef struct mdu_disk_info_s {
 	/*
 	 * configuration/status of one particular disk
diff --git a/drivers/md/raid6.h b/include/linux/raid/pq.h
index 98dcde8..d92480f 100644
--- a/drivers/md/raid6.h
+++ b/include/linux/raid/pq.h
@@ -5,7 +5,7 @@
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Bostom MA 02111-1307, USA; either version 2 of the License, or
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
  *   (at your option) any later version; incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
@@ -17,14 +17,7 @@
 
 /* Set to 1 to use kernel-wide empty_zero_page */
 #define RAID6_USE_EMPTY_ZERO_PAGE 0
-
-#include <linux/raid/md.h>
-#include <linux/raid/raid5.h>
-
-typedef raid5_conf_t raid6_conf_t; /* Same configuration */
-
-/* Additional compute_parity mode -- updates the parity w/o LOCKING */
-#define UPDATE_PARITY	4
+#include <linux/blkdev.h>
 
 /* We need a pre-zeroed page... if we don't want to use the kernel-provided
    one define it here */
@@ -68,6 +61,10 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
 #define enable_kernel_altivec()
 #define disable_kernel_altivec()
 
+#define EXPORT_SYMBOL(sym)
+#define MODULE_LICENSE(licence)
+#define subsys_initcall(x)
+#define module_exit(x)
 #endif /* __KERNEL__ */
 
 /* Routine choices */
@@ -98,9 +95,11 @@ extern const u8 raid6_gfinv[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfexi[256]      __attribute__((aligned(256)));
 
 /* Recovery routines */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+		       void **ptrs);
 void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
-void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
+void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
+		      void **ptrs);
 
 /* Some definitions to allow code to be compiled for testing in userspace */
 #ifndef __KERNEL__
@@ -108,8 +107,11 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs
 # define jiffies	raid6_jiffies()
 # define printk 	printf
 # define GFP_KERNEL	0
-# define __get_free_pages(x,y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0))
-# define free_pages(x,y)	munmap((void *)(x), (y)*PAGE_SIZE)
+# define __get_free_pages(x, y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
+						     PROT_READ|PROT_WRITE,   \
+						     MAP_PRIVATE|MAP_ANONYMOUS,\
+						     0, 0))
+# define free_pages(x, y)	munmap((void *)(x), (y)*PAGE_SIZE)
 
 static inline void cpu_relax(void)
 {
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 3e12058..5a21095 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -1,8 +1,6 @@
 #ifndef _XOR_H
 #define _XOR_H
 
-#include <linux/raid/md.h>
-
 #define MAX_XOR_BLOCKS 4
 
 extern void xor_blocks(unsigned int count, unsigned int bytes,
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index dd25317..3e08a1c 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -14,7 +14,7 @@ struct timeriomem_rng_data {
 	struct completion	completion;
 	unsigned int		present:1;
 
-	u32 __iomem		*address;
+	void __iomem		*address;
 
 	/* measures in usecs */
 	unsigned int		period;
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 9aa968d..f5b978a 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 9bdddbc..69aebbf 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -1,5 +1,6 @@
 #include <linux/delay.h>
-#include <linux/raid/md.h>
+#include <linux/raid/md_u.h>
+#include <linux/raid/md_p.h>
 
 #include "do_mounts.h"
 
@@ -112,8 +113,6 @@ static int __init md_setup(char *str)
 	return 1;
 }
 
-#define MdpMinorShift 6
-
 static void __init md_setup_drive(void)
 {
 	int minor, i, ent, partitioned;
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 28574ae..b1fd48d 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -75,6 +75,10 @@ case "${ARCH}" in
 	alpha)
 		[ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
 		;;
+	parisc*)
+		[ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+		[ -f "${objtree}/lifimage" ] && cp -v -- "${objtree}/lifimage" "${tmpdir}/boot/lifimage-${KERNELRELEASE}"
+		;;
 	vax)
 		[ -f "${objtree}/vmlinux.SYS" ] && cp -v -- "${objtree}/vmlinux.SYS" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.SYS"
 		[ -f "${objtree}/vmlinux.dsk" ] && cp -v -- "${objtree}/vmlinux.dsk" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.dsk"