111 files changed, 1350 insertions, 766 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc84..f8b86e9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
+	if (rc) {
+		__putname(v9ses->aname);
+		__putname(v9ses->uname);
+		return ERR_PTR(rc);
+	}
+
 	spin_lock(&v9fs_sessionlist_lock);
 	list_add(&v9ses->slist, &v9fs_sessionlist);
 	spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	return fid;
 
 error:
+	bdi_destroy(&v9ses->bdi);
 	return ERR_PTR(retval);
 }
 
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 	__putname(v9ses->uname);
 	__putname(v9ses->aname);
 
+	bdi_destroy(&v9ses->bdi);
+
 	spin_lock(&v9fs_sessionlist_lock);
 	list_del(&v9ses->slist);
 	spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3d..bec4d0b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
  *  Boston, MA  02111-1301  USA
  *
  */
+#include <linux/backing-dev.h>
 
 /**
  * enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
 	u32 uid;		/* if ACCESS_SINGLE, the uid that has access */
 	struct p9_client *clnt;	/* 9p client */
 	struct list_head slist; /* list of sessions registered with v9fs */
+	struct backing_dev_info bdi;
 };
 
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108b..806da5d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 	sb->s_blocksize = 1 << sb->s_blocksize_bits;
 	sb->s_magic = V9FS_MAGIC;
 	sb->s_op = &v9fs_super_ops;
+	sb->s_bdi = &v9ses->bdi;
 
 	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
 	    MS_NOATIME;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4..a10f258 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/fscache.h>
+#include <linux/backing-dev.h>
 
 #include "afs.h"
 #include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
 	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
 	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
 	struct rw_semaphore	server_sem;	/* lock for accessing current server */
+	struct backing_dev_info	bdi;
 };
 
 /*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a8..b3feddc 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 {
 	struct afs_super_info *super;
 	struct vfsmount *mnt;
-	struct page *page = NULL;
+	struct page *page;
 	size_t size;
-	char *buf, *devname = NULL, *options = NULL;
+	char *buf, *devname, *options;
 	int ret;
 
 	_enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	ret = -EINVAL;
 	size = mntpt->d_inode->i_size;
 	if (size > PAGE_SIZE - 1)
-		goto error;
+		goto error_no_devname;
 
 	ret = -ENOMEM;
 	devname = (char *) get_zeroed_page(GFP_KERNEL);
 	if (!devname)
-		goto error;
+		goto error_no_devname;
 
 	options = (char *) get_zeroed_page(GFP_KERNEL);
 	if (!options)
-		goto error;
+		goto error_no_options;
 
 	/* read the contents of the AFS special symlink */
 	page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
 	if (IS_ERR(page)) {
 		ret = PTR_ERR(page);
-		goto error;
+		goto error_no_page;
 	}
 
 	ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	return mnt;
 
 error:
-	if (page)
-		page_cache_release(page);
-	if (devname)
-		free_page((unsigned long) devname);
-	if (options)
-		free_page((unsigned long) options);
+	page_cache_release(page);
+error_no_page:
+	free_page((unsigned long) options);
+error_no_options:
+	free_page((unsigned long) devname);
+error_no_devname:
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431..e932e5a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
 	sb->s_fs_info		= as;
+	sb->s_bdi		= &as->volume->bdi;
 
 	/* allocate the root inode and dentry */
 	fid.vid		= as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69..401eeb2 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 	volume->cell		= params->cell;
 	volume->vid		= vlocation->vldb.vid[params->type];
 
+	ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
+	if (ret)
+		goto error_bdi;
+
 	init_rwsem(&volume->server_sem);
 
 	/* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
 	return ERR_PTR(ret);
 
 error_discard:
+	bdi_destroy(&volume->bdi);
+error_bdi:
 	up_write(&params->cell->vl_sem);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
 		afs_put_server(volume->servers[loop]);
 
+	bdi_destroy(&volume->bdi);
 	kfree(volume);
 
 	_leave(" [destroyed]");
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e0..2c5f9a0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 				}
 			} else if (!mm->start_data) {
 				mm->start_data = seg->addr;
-#ifndef CONFIG_MMU
 				mm->end_data = seg->addr + phdr->p_memsz;
-#endif
 			}
-
-#ifdef CONFIG_MMU
-			if (seg->addr + phdr->p_memsz > mm->end_data)
-				mm->end_data = seg->addr + phdr->p_memsz;
-#endif
 		}
 
 		seg++;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769b..49566c1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
 
 	if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
 		printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
-		       (int) r,(int)(start_brk-start_code),(int)text_len);
+		       (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
 		goto failed;
 	}
 
diff --git a/fs/bio.c b/fs/bio.c
index e1f9221..e7bf6ca 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 					.bi_rw = bio->bi_rw,
 				};
 
-				if (q->merge_bvec_fn(q, &bvm, prev) < len) {
+				if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
 					prev->bv_len -= len;
 					return 0;
 				}
@@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 		 * merge_bvec_fn() returns number of bytes it can accept
 		 * at this offset
 		 */
-		if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
+		if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
 			bvec->bv_page = NULL;
 			bvec->bv_len = 0;
 			bvec->bv_offset = 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d11d028..6dcee88 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -404,20 +404,28 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
  *	NULL first argument is nfsd_sync_dir() and that's not a directory.
  */
  
-static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
-	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+	struct inode *bd_inode = filp->f_mapping->host;
+	struct block_device *bdev = I_BDEV(bd_inode);
 	int error;
 
-	error = sync_blockdev(bdev);
-	if (error)
-		return error;
-	
+	/*
+	 * There is no need to serialise calls to blkdev_issue_flush with
+	 * i_mutex and doing so causes performance issues with concurrent
+	 * O_SYNC writers to a block device.
+	 */
+	mutex_unlock(&bd_inode->i_mutex);
+
 	error = blkdev_issue_flush(bdev, NULL);
 	if (error == -EOPNOTSUPP)
 		error = 0;
+
+	mutex_lock(&bd_inode->i_mutex);
+
 	return error;
 }
+EXPORT_SYMBOL(blkdev_fsync);
 
 /*
  * pseudo-fs
@@ -1481,7 +1489,7 @@ const struct file_operations def_blk_fops = {
   	.aio_read	= generic_file_aio_read,
 	.aio_write	= blkdev_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= block_fsync,
+	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c..feca041 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
 
-static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
-
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
  * complete.  This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 {
 	int err;
 
-	bdi->name = "btrfs";
 	bdi->capabilities = BDI_CAP_MAP_COPY;
-	err = bdi_init(bdi);
+	err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
 	if (err)
 		return err;
 
-	err = bdi_register(bdi, NULL, "btrfs-%d",
-				atomic_inc_return(&btrfs_bdi_num));
-	if (err) {
-		bdi_destroy(bdi);
-		return err;
-	}
-
 	bdi->ra_pages	= default_backing_dev_info.ra_pages;
 	bdi->unplug_io_fn	= btrfs_unplug_io_fn;
 	bdi->unplug_io_data	= info;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e23ffe..b34d32f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3235,7 +3235,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
 				u64 bytes)
 {
 	struct btrfs_space_info *data_sinfo;
-	int ret = 0, committed = 0;
+	u64 used;
+	int ret = 0, committed = 0, flushed = 0;
 
 	/* make sure bytes are sectorsize aligned */
 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3247,12 +3248,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
 again:
 	/* make sure we have enough space to handle the data first */
 	spin_lock(&data_sinfo->lock);
-	if (data_sinfo->total_bytes - data_sinfo->bytes_used -
-	    data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
-	    data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
-	    data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
+	used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
+		data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
+		data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
+		data_sinfo->bytes_super;
+
+	if (used + bytes > data_sinfo->total_bytes) {
 		struct btrfs_trans_handle *trans;
 
+		if (!flushed) {
+			spin_unlock(&data_sinfo->lock);
+			flush_delalloc(root, data_sinfo);
+			flushed = 1;
+			goto again;
+		}
+
 		/*
 		 * if we don't have enough free bytes in this space then we need
 		 * to alloc a new chunk.
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index aa7dc36..8db7b14 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2250,6 +2250,12 @@ again:
 	if (!looped)
 		calc_size = max_t(u64, min_stripe_size, calc_size);
 
+	/*
+	 * we're about to do_div by the stripe_len so lets make sure
+	 * we end up with something bigger than a stripe
+	 */
+	calc_size = max_t(u64, calc_size, stripe_len * 4);
+
 	do_div(calc_size, stripe_len);
 	calc_size *= stripe_len;
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index aa3cd7c..4b42c2b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -337,16 +337,15 @@ out:
 /*
  * Get ref for the oldest snapc for an inode with dirty data... that is, the
  * only snap context we are allowed to write back.
- *
- * Caller holds i_lock.
  */
-static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
-						      u64 *snap_size)
+static struct ceph_snap_context *get_oldest_context(struct inode *inode,
+						    u64 *snap_size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_snap_context *snapc = NULL;
 	struct ceph_cap_snap *capsnap = NULL;
 
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
 		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
 		     capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
 			break;
 		}
 	}
-	if (!snapc && ci->i_snap_realm) {
-		snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
+	if (!snapc && ci->i_head_snapc) {
+		snapc = ceph_get_snap_context(ci->i_head_snapc);
 		dout(" head snapc %p has %d dirty pages\n",
 		     snapc, ci->i_wrbuffer_ref_head);
 	}
-	return snapc;
-}
-
-static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-						    u64 *snap_size)
-{
-	struct ceph_snap_context *snapc = NULL;
-
-	spin_lock(&inode->i_lock);
-	snapc = __get_oldest_context(inode, snap_size);
 	spin_unlock(&inode->i_lock);
 	return snapc;
 }
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	int len = PAGE_CACHE_SIZE;
 	loff_t i_size;
 	int err = 0;
-	struct ceph_snap_context *snapc;
+	struct ceph_snap_context *snapc, *oldest;
 	u64 snap_size = 0;
 	long writeback_stat;
 
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		dout("writepage %p page %p not dirty?\n", inode, page);
 		goto out;
 	}
-	if (snapc != get_oldest_context(inode, &snap_size)) {
+	oldest = get_oldest_context(inode, &snap_size);
+	if (snapc->seq > oldest->seq) {
 		dout("writepage %p page %p snapc %p not writeable - noop\n",
 		     inode, page, (void *)page->private);
 		/* we should only noop if called by kswapd */
 		WARN_ON((current->flags & PF_MEMALLOC) == 0);
+		ceph_put_snap_context(oldest);
 		goto out;
 	}
+	ceph_put_snap_context(oldest);
 
 	/* is this a partial page at end of file? */
 	if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	ClearPagePrivate(page);
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-	ceph_put_snap_context(snapc);
+	ceph_put_snap_context(snapc);  /* page's reference */
 out:
 	return err;
 }
@@ -517,7 +509,7 @@ static void writepages_finish(struct ceph_osd_request *req,
 	u64 bytes = 0;
 	struct ceph_client *client = ceph_inode_to_client(inode);
 	long writeback_stat;
-	unsigned issued = __ceph_caps_issued(ci, NULL);
+	unsigned issued = ceph_caps_issued(ci);
 
 	/* parse reply */
 	replyhead = msg->front.iov_base;
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
 			dout("inode %p skipping page %p\n", inode, page);
 			wbc->pages_skipped++;
 		}
+		ceph_put_snap_context((void *)page->private);
 		page->private = 0;
 		ClearPagePrivate(page);
-		ceph_put_snap_context(snapc);
 		dout("unlocking %d %p\n", i, page);
 		end_page_writeback(page);
 
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	int range_whole = 0;
 	int should_loop = 1;
 	pgoff_t max_pages = 0, max_pages_ever = 0;
-	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
+	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
 	struct pagevec pvec;
 	int done = 0;
 	int rc = 0;
@@ -770,9 +762,10 @@ get_more_pages:
 			}
 
 			/* only if matching snap context */
-			if (snapc != (void *)page->private) {
-				dout("page snapc %p != oldest %p\n",
-				     (void *)page->private, snapc);
+			pgsnapc = (void *)page->private;
+			if (pgsnapc->seq > snapc->seq) {
+				dout("page snapc %p %lld > oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
 				unlock_page(page);
 				if (!locked_pages)
 					continue; /* keep looking for snap */
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
 					   struct ceph_snap_context *snapc)
 {
 	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
-	return !oldest || snapc->seq <= oldest->seq;
+	int ret = !oldest || snapc->seq <= oldest->seq;
+
+	ceph_put_snap_context(oldest);
+	return ret;
 }
 
 /*
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
 	int pos_in_page = pos & ~PAGE_CACHE_MASK;
 	int end_in_page = pos_in_page + len;
 	loff_t i_size;
-	struct ceph_snap_context *snapc;
 	int r;
+	struct ceph_snap_context *snapc, *oldest;
 
 retry_locked:
 	/* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +943,24 @@ retry_locked:
 	BUG_ON(!ci->i_snap_realm);
 	down_read(&mdsc->snap_rwsem);
 	BUG_ON(!ci->i_snap_realm->cached_context);
-	if (page->private &&
-	    (void *)page->private != ci->i_snap_realm->cached_context) {
+	snapc = (void *)page->private;
+	if (snapc && snapc != ci->i_head_snapc) {
 		/*
 		 * this page is already dirty in another (older) snap
 		 * context!  is it writeable now?
 		 */
-		snapc = get_oldest_context(inode, NULL);
+		oldest = get_oldest_context(inode, NULL);
 		up_read(&mdsc->snap_rwsem);
 
-		if (snapc != (void *)page->private) {
+		if (snapc->seq > oldest->seq) {
+			ceph_put_snap_context(oldest);
 			dout(" page %p snapc %p not current or oldest\n",
-			     page, (void *)page->private);
+			     page, snapc);
 			/*
 			 * queue for writeback, and wait for snapc to
 			 * be writeable or written
 			 */
-			snapc = ceph_get_snap_context((void *)page->private);
+			snapc = ceph_get_snap_context(snapc);
 			unlock_page(page);
 			ceph_queue_writeback(inode);
 			r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +970,7 @@ retry_locked:
 				return r;
 			return -EAGAIN;
 		}
+		ceph_put_snap_context(oldest);
 
 		/* yay, writeable, do it now (without dropping page lock) */
 		dout(" page %p snapc %p not current, but oldest\n",
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b9..818afe7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 
 #include "types.h"
 #include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c0553..8164df1 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
 #ifndef _FS_CEPH_AUTH_NONE_H
 #define _FS_CEPH_AUTH_NONE_H
 
+#include <linux/slab.h>
+
 #include "auth.h"
 
 /*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4..fee5a08d 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
 #include "auth.h"
 #include "decode.h"
 
-struct kmem_cache *ceph_x_ticketbuf_cachep;
-
 #define TEMP_TICKET_BUF_LEN	256
 
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	char *ticket_buf;
 	u8 struct_v;
 
-	dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC);
+	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!dbuf)
 		return -ENOMEM;
 
 	ret = -ENOMEM;
-	ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep,
-				      GFP_NOFS | GFP_ATOMIC);
+	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!ticket_buf)
 		goto out_dbuf;
 
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 
 	ret = 0;
 out:
-	kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf);
+	kfree(ticket_buf);
 out_dbuf:
-	kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf);
+	kfree(dbuf);
 	return ret;
 
 bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
 		remove_ticket_handler(ac, th);
 	}
 
-	kmem_cache_destroy(ceph_x_ticketbuf_cachep);
-
 	kfree(ac->private);
 	ac->private = NULL;
 }
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
 	int ret;
 
 	dout("ceph_x_init %p\n", ac);
+	ret = -ENOMEM;
 	xi = kzalloc(sizeof(*xi), GFP_NOFS);
 	if (!xi)
-		return -ENOMEM;
+		goto out;
 
-	ret = -ENOMEM;
-	ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
-				      TEMP_TICKET_BUF_LEN, 8,
-				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-				      NULL);
-	if (!ceph_x_ticketbuf_cachep)
-		goto done_nomem;
 	ret = -EINVAL;
 	if (!ac->secret) {
 		pr_err("no secret set (for auth_x protocol)\n");
-		goto done_nomem;
+		goto out_nomem;
 	}
 
 	ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
 	if (ret)
-		goto done_nomem;
+		goto out_nomem;
 
 	xi->starting = true;
 	xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
 	ac->ops = &ceph_x_ops;
 	return 0;
 
-done_nomem:
+out_nomem:
 	kfree(xi);
-	if (ceph_x_ticketbuf_cachep)
-		kmem_cache_destroy(ceph_x_ticketbuf_cachep);
+out:
 	return ret;
 }
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3710e07..0c16818 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1205,6 +1205,12 @@ retry:
 		if (capsnap->dirty_pages || capsnap->writing)
 			continue;
 
+		/*
+		 * if cap writeback already occurred, we should have dropped
+		 * the capsnap in ceph_put_wrbuffer_cap_refs.
+		 */
+		BUG_ON(capsnap->dirty == 0);
+
 		/* pick mds, take s_mutex */
 		mds = __ceph_get_cap_mds(ci, &mseq);
 		if (session && session->s_mds != mds) {
@@ -1855,8 +1861,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
 		} else {
 			pr_err("%p auth cap %p not mds%d ???\n", inode,
 			       cap, session->s_mds);
-			spin_unlock(&inode->i_lock);
 		}
+		spin_unlock(&inode->i_lock);
 	}
 }
 
@@ -2118,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 		}
 	spin_unlock(&inode->i_lock);
 
-	dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
-	     last ? "last" : "");
+	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+	     last ? " last" : "", put ? " put" : "");
 
 	if (last && !flushsnaps)
 		ceph_check_caps(ci, 0, NULL);
@@ -2143,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 {
 	struct inode *inode = &ci->vfs_inode;
 	int last = 0;
-	int last_snap = 0;
+	int complete_capsnap = 0;
+	int drop_capsnap = 0;
 	int found = 0;
 	struct ceph_cap_snap *capsnap = NULL;
 
@@ -2166,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
 			if (capsnap->context == snapc) {
 				found = 1;
-				capsnap->dirty_pages -= nr;
-				last_snap = !capsnap->dirty_pages;
 				break;
 			}
 		}
 		BUG_ON(!found);
+		capsnap->dirty_pages -= nr;
+		if (capsnap->dirty_pages == 0) {
+			complete_capsnap = 1;
+			if (capsnap->dirty == 0)
+				/* cap writeback completed before we created
+				 * the cap_snap; no FLUSHSNAP is needed */
+				drop_capsnap = 1;
+		}
 		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
-		     " snap %lld %d/%d -> %d/%d %s%s\n",
+		     " snap %lld %d/%d -> %d/%d %s%s%s\n",
 		     inode, capsnap, capsnap->context->seq,
 		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
 		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
 		     last ? " (wrbuffer last)" : "",
-		     last_snap ? " (capsnap last)" : "");
+		     complete_capsnap ? " (complete capsnap)" : "",
+		     drop_capsnap ? " (drop capsnap)" : "");
+		if (drop_capsnap) {
+			ceph_put_snap_context(capsnap->context);
+			list_del(&capsnap->ci_item);
+			list_del(&capsnap->flushing_item);
+			ceph_put_cap_snap(capsnap);
+		}
 	}
 
 	spin_unlock(&inode->i_lock);
@@ -2186,10 +2206,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 	if (last) {
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 		iput(inode);
-	} else if (last_snap) {
+	} else if (complete_capsnap) {
 		ceph_flush_snaps(ci);
 		wake_up(&ci->i_cap_wq);
 	}
+	if (drop_capsnap)
+		iput(inode);
 }
 
 /*
@@ -2465,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 				break;
 			}
 			WARN_ON(capsnap->dirty_pages || capsnap->writing);
-			dout(" removing cap_snap %p follows %lld\n",
-			     capsnap, follows);
+			dout(" removing %p cap_snap %p follows %lld\n",
+			     inode, capsnap, follows);
 			ceph_put_snap_context(capsnap->context);
 			list_del(&capsnap->ci_item);
 			list_del(&capsnap->flushing_item);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7261dc6..650d2db 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -171,11 +171,11 @@ more:
 	spin_lock(&inode->i_lock);
 	spin_lock(&dcache_lock);
 
+	last = dentry;
+
 	if (err < 0)
 		goto out_unlock;
 
-	last = dentry;
-
 	p = p->prev;
 	filp->f_pos++;
 
@@ -312,7 +312,7 @@ more:
 		req->r_readdir_offset = fi->next_offset;
 		req->r_args.readdir.frag = cpu_to_le32(frag);
 		req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-		req->r_num_caps = max_entries;
+		req->r_num_caps = max_entries + 1;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 		if (err < 0) {
 			ceph_mdsc_put_request(req);
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 		struct inode *inode = ceph_get_snapdir(parent);
 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
+		BUG_ON(!d_unhashed(dentry));
 		d_add(dentry, inode);
 		err = 0;
 	}
@@ -879,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * do_request, above).  If there is no trace, we need
 		 * to do it here.
 		 */
+
+		/* d_move screws up d_subdirs order */
+		ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+
 		d_move(old_dentry, new_dentry);
+
+		/* ensure target dentry is invalidated, despite
+		   rehashing bug in vfs_rename_dir */
+		new_dentry->d_time = jiffies;
+		ceph_dentry(new_dentry)->lease_shared_gen = 0;
 	}
 	ceph_mdsc_put_request(req);
 	return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5..ed6f197 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
 		 * throw out any page cache pages in this range. this
 		 * may block.
 		 */
-		truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+		truncate_inode_pages_range(inode->i_mapping, pos, 
+					   (pos+len) | (PAGE_CACHE_SIZE-1));
 	} else {
 		pages = alloc_page_vector(num_pages);
 		if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index aca82d5..261f3e6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	struct inode *in = NULL;
 	struct ceph_mds_reply_inode *ininfo;
 	struct ceph_vino vino;
+	struct ceph_client *client = ceph_sb_to_client(sb);
 	int i = 0;
 	int err = 0;
 
@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			return err;
 	}
 
-	if (rinfo->head->is_dentry && !req->r_aborted) {
+	/*
+	 * ignore null lease/binding on snapdir ENOENT, or else we
+	 * will have trouble splicing in the virtual snapdir later
+	 */
+	if (rinfo->head->is_dentry && !req->r_aborted &&
+	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
+					       client->mount_args->snapdir_name,
+					       req->r_dentry->d_name.len))) {
 		/*
 		 * lookup link rename   : null -> possibly existing inode
 		 * mknod symlink mkdir  : null -> new inode
@@ -989,6 +997,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			     dn, dn->d_name.len, dn->d_name.name);
 			dout("fill_trace doing d_move %p -> %p\n",
 			     req->r_old_dentry, dn);
+
+			/* d_move screws up d_subdirs order */
+			ceph_i_clear(dir, CEPH_I_COMPLETE);
+
 			d_move(req->r_old_dentry, dn);
 			dout(" src %p '%.*s' dst %p '%.*s'\n",
 			     req->r_old_dentry,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 8f1715f..509f57d 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
 static char tag_ack = CEPH_MSGR_TAG_ACK;
 static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
 
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
 
 static void queue_con(struct ceph_connection *con);
 static void con_work(struct work_struct *);
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 	con->sock = sock;
 	sock->sk->sk_allocation = GFP_NOFS;
 
+#ifdef CONFIG_LOCKDEP
+	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
 	set_sock_callbacks(sock, con);
 
 	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
 		con->out_msg = NULL;
 	}
 	con->in_seq = 0;
+	con->in_seq_acked = 0;
 }
 
 /*
@@ -1325,6 +1334,7 @@ static int read_partial_message(struct ceph_connection *con)
 	unsigned front_len, middle_len, data_len, data_off;
 	int datacrc = con->msgr->nocrc;
 	int skip;
+	u64 seq;
 
 	dout("read_partial_message con %p msg %p\n", con, m);
 
@@ -1359,6 +1369,25 @@ static int read_partial_message(struct ceph_connection *con)
 		return -EIO;
 	data_off = le16_to_cpu(con->in_hdr.data_off);
 
+	/* verify seq# */
+	seq = le64_to_cpu(con->in_hdr.seq);
+	if ((s64)seq - (s64)con->in_seq < 1) {
+		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+			ENTITY_NAME(con->peer_name),
+			pr_addr(&con->peer_addr.in_addr),
+			seq, con->in_seq + 1);
+		con->in_base_pos = -front_len - middle_len - data_len -
+			sizeof(m->footer);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
+		return 0;
+	} else if ((s64)seq - (s64)con->in_seq > 1) {
+		pr_err("read_partial_message bad seq %lld expected %lld\n",
+		       seq, con->in_seq + 1);
+		con->error_msg = "bad message sequence # for incoming message";
+		return -EBADMSG;
+	}
+
 	/* allocate message? */
 	if (!con->in_msg) {
 		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1370,6 +1399,7 @@ static int read_partial_message(struct ceph_connection *con)
 			con->in_base_pos = -front_len - middle_len - data_len -
 				sizeof(m->footer);
 			con->in_tag = CEPH_MSGR_TAG_READY;
+			con->in_seq++;
 			return 0;
 		}
 		if (IS_ERR(con->in_msg)) {
@@ -2021,6 +2051,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
 		ceph_msg_put(con->in_msg);
 		con->in_msg = NULL;
 		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
 	} else {
 		dout("con_revoke_pages %p msg %p pages %p no-op\n",
 		     con, con->in_msg, msg);
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 21c6623..2e2c15e 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -314,71 +314,6 @@ bad:
 	return ERR_PTR(err);
 }
 
-
-/*
- * osd map
- */
-void ceph_osdmap_destroy(struct ceph_osdmap *map)
-{
-	dout("osdmap_destroy %p\n", map);
-	if (map->crush)
-		crush_destroy(map->crush);
-	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
-		struct ceph_pg_mapping *pg =
-			rb_entry(rb_first(&map->pg_temp),
-				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->pg_temp);
-		kfree(pg);
-	}
-	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
-		struct ceph_pg_pool_info *pi =
-			rb_entry(rb_first(&map->pg_pools),
-				 struct ceph_pg_pool_info, node);
-		rb_erase(&pi->node, &map->pg_pools);
-		kfree(pi);
-	}
-	kfree(map->osd_state);
-	kfree(map->osd_weight);
-	kfree(map->osd_addr);
-	kfree(map);
-}
-
-/*
- * adjust max osd value.  reallocate arrays.
- */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
-{
-	u8 *state;
-	struct ceph_entity_addr *addr;
-	u32 *weight;
-
-	state = kcalloc(max, sizeof(*state), GFP_NOFS);
-	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
-	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
-	if (state == NULL || addr == NULL || weight == NULL) {
-		kfree(state);
-		kfree(addr);
-		kfree(weight);
-		return -ENOMEM;
-	}
-
-	/* copy old? */
-	if (map->osd_state) {
-		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
-		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
-		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
-		kfree(map->osd_state);
-		kfree(map->osd_addr);
-		kfree(map->osd_weight);
-	}
-
-	map->osd_state = state;
-	map->osd_weight = weight;
-	map->osd_addr = addr;
-	map->max_osd = max;
-	return 0;
-}
-
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds)
@@ -482,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+	rb_erase(&pi->node, root);
+	kfree(pi->name);
+	kfree(pi);
+}
+
 void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
 {
 	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
@@ -490,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
 	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
 }
 
+static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+	struct ceph_pg_pool_info *pi;
+	u32 num, len, pool;
+
+	ceph_decode_32_safe(p, end, num, bad);
+	dout(" %d pool names\n", num);
+	while (num--) {
+		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_32_safe(p, end, len, bad);
+		dout("  pool %d len %d\n", pool, len);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi) {
+			kfree(pi->name);
+			pi->name = kmalloc(len + 1, GFP_NOFS);
+			if (pi->name) {
+				memcpy(pi->name, *p, len);
+				pi->name[len] = '\0';
+				dout("  name is %s\n", pi->name);
+			}
+		}
+		*p += len;
+	}
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+	dout("osdmap_destroy %p\n", map);
+	if (map->crush)
+		crush_destroy(map->crush);
+	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_temp),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_temp);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rb_first(&map->pg_pools),
+				 struct ceph_pg_pool_info, node);
+		__remove_pg_pool(&map->pg_pools, pi);
+	}
+	kfree(map->osd_state);
+	kfree(map->osd_weight);
+	kfree(map->osd_addr);
+	kfree(map);
+}
+
+/*
+ * adjust max osd value.  reallocate arrays.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+	u8 *state;
+	struct ceph_entity_addr *addr;
+	u32 *weight;
+
+	state = kcalloc(max, sizeof(*state), GFP_NOFS);
+	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
+	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
+	if (state == NULL || addr == NULL || weight == NULL) {
+		kfree(state);
+		kfree(addr);
+		kfree(weight);
+		return -ENOMEM;
+	}
+
+	/* copy old? */
+	if (map->osd_state) {
+		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
+		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
+		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
+		kfree(map->osd_state);
+		kfree(map->osd_addr);
+		kfree(map->osd_weight);
+	}
+
+	map->osd_state = state;
+	map->osd_weight = weight;
+	map->osd_addr = addr;
+	map->max_osd = max;
+	return 0;
+}
+
 /*
  * decode a full map.
  */
@@ -526,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	ceph_decode_32_safe(p, end, max, bad);
 	while (max--) {
 		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
-		pi = kmalloc(sizeof(*pi), GFP_NOFS);
+		pi = kzalloc(sizeof(*pi), GFP_NOFS);
 		if (!pi)
 			goto bad;
 		pi->id = ceph_decode_32(p);
@@ -539,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		__decode_pool(p, pi);
 		__insert_pg_pool(&map->pg_pools, pi);
 	}
+
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
+
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
 
 	ceph_decode_32_safe(p, end, map->flags, bad);
@@ -712,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		}
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (!pi) {
-			pi = kmalloc(sizeof(*pi), GFP_NOFS);
+			pi = kzalloc(sizeof(*pi), GFP_NOFS);
 			if (!pi) {
 				err = -ENOMEM;
 				goto bad;
@@ -722,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		}
 		__decode_pool(p, pi);
 	}
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
 
 	/* old_pool */
 	ceph_decode_32_safe(p, end, len, bad);
@@ -730,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 
 		ceph_decode_32_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
-		if (pi) {
-			rb_erase(&pi->node, &map->pg_pools);
-			kfree(pi);
-		}
+		if (pi)
+			__remove_pg_pool(&map->pg_pools, pi);
 	}
 
 	/* new_up */
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 1fb55af..8bc9f1e 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info {
 	int id;
 	struct ceph_pg_pool v;
 	int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+	char *name;
 };
 
 struct ceph_pg_mapping {
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 26ac8b8..a1fc1d0 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,10 @@
 /*
  * osdmap encoding versions
  */
-#define CEPH_OSDMAP_INC_VERSION 4
-#define CEPH_OSDMAP_VERSION     4
+#define CEPH_OSDMAP_INC_VERSION     5
+#define CEPH_OSDMAP_INC_VERSION_EXT 5
+#define CEPH_OSDMAP_VERSION         5
+#define CEPH_OSDMAP_VERSION_EXT     5
 
 /*
  * fs id
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e6f9bc5..d5114db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -431,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num)
  * Caller must hold snap_rwsem for read (i.e., the realm topology won't
  * change).
  */
-void ceph_queue_cap_snap(struct ceph_inode_info *ci,
-			 struct ceph_snap_context *snapc)
+void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 {
 	struct inode *inode = &ci->vfs_inode;
 	struct ceph_cap_snap *capsnap;
@@ -451,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		   as no new writes are allowed to start when pending, so any
 		   writes in progress now were started before the previous
 		   cap_snap.  lucky us. */
-		dout("queue_cap_snap %p snapc %p seq %llu used %d"
-		     " already pending\n", inode, snapc, snapc->seq, used);
+		dout("queue_cap_snap %p already pending\n", inode);
 		kfree(capsnap);
 	} else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
+		struct ceph_snap_context *snapc = ci->i_head_snapc;
+
 		igrab(inode);
 
 		atomic_set(&capsnap->nref, 1);
@@ -463,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		INIT_LIST_HEAD(&capsnap->flushing_item);
 
 		capsnap->follows = snapc->seq - 1;
-		capsnap->context = ceph_get_snap_context(snapc);
 		capsnap->issued = __ceph_caps_issued(ci, NULL);
 		capsnap->dirty = __ceph_caps_dirty(ci);
 
@@ -480,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		   snapshot. */
 		capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
 		ci->i_wrbuffer_ref_head = 0;
-		ceph_put_snap_context(ci->i_head_snapc);
+		capsnap->context = snapc;
 		ci->i_head_snapc = NULL;
 		list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
 
@@ -522,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	capsnap->ctime = inode->i_ctime;
 	capsnap->time_warp_seq = ci->i_time_warp_seq;
 	if (capsnap->dirty_pages) {
-		dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu "
+		dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
 		     "still has %d dirty pages\n", inode, capsnap,
 		     capsnap->context, capsnap->context->seq,
-		     capsnap->size, capsnap->dirty_pages);
+		     ceph_cap_string(capsnap->dirty), capsnap->size,
+		     capsnap->dirty_pages);
 		return 0;
 	}
-	dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n",
+	dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
 	     inode, capsnap, capsnap->context,
-	     capsnap->context->seq, capsnap->size);
+	     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+	     capsnap->size);
 
 	spin_lock(&mdsc->snap_flush_lock);
 	list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
@@ -602,7 +603,7 @@ more:
 				if (lastinode)
 					iput(lastinode);
 				lastinode = inode;
-				ceph_queue_cap_snap(ci, realm->cached_context);
+				ceph_queue_cap_snap(ci);
 				spin_lock(&realm->inodes_with_caps_lock);
 			}
 			spin_unlock(&realm->inodes_with_caps_lock);
@@ -824,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			spin_unlock(&realm->inodes_with_caps_lock);
 			spin_unlock(&inode->i_lock);
 
-			ceph_queue_cap_snap(ci,
-					    ci->i_snap_realm->cached_context);
+			ceph_queue_cap_snap(ci);
 
 			iput(inode);
 			continue;
@@ -869,16 +869,20 @@ skip_inode:
 				continue;
 			ci = ceph_inode(inode);
 			spin_lock(&inode->i_lock);
-			if (!ci->i_snap_realm)
-				goto split_skip_inode;
-			ceph_put_snap_realm(mdsc, ci->i_snap_realm);
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			ci->i_snap_realm = realm;
-			spin_unlock(&realm->inodes_with_caps_lock);
-			ceph_get_snap_realm(mdsc, realm);
-split_skip_inode:
+			if (list_empty(&ci->i_snap_realm_item)) {
+				struct ceph_snap_realm *oldrealm =
+					ci->i_snap_realm;
+
+				dout(" moving %p to split realm %llx %p\n",
+				     inode, realm->ino, realm);
+				spin_lock(&realm->inodes_with_caps_lock);
+				list_add(&ci->i_snap_realm_item,
+					 &realm->inodes_with_caps);
+				ci->i_snap_realm = realm;
+				spin_unlock(&realm->inodes_with_caps_lock);
+				ceph_get_snap_realm(mdsc, realm);
+				ceph_put_snap_realm(mdsc, oldrealm);
+			}
 			spin_unlock(&inode->i_lock);
 			iput(inode);
 		}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02ea..f888cf4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -996,9 +996,10 @@ static int __init init_ceph(void)
 	if (ret)
 		goto out_icache;
 
-	pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
-		CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
-		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+	pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
 	return 0;
 
 out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ca702c6..13513b8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mempool.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
@@ -715,8 +716,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m,
 extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			     struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
-extern void ceph_queue_cap_snap(struct ceph_inode_info *ci,
-				struct ceph_snap_context *snapc);
+extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
 extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap);
 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787..246a167 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
 #ifndef _CIFS_FS_SB_H
 #define _CIFS_FS_SB_H
 
+#include <linux/backing-dev.h>
+
 #define CIFS_MOUNT_NO_PERM      1 /* do not do client vfs_perm check */
 #define CIFS_MOUNT_SET_UID      2 /* set current's euid in create etc. */
 #define CIFS_MOUNT_SERVER_INUM  4 /* inode numbers from uniqueid from server  */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	char   *mountdata; /* mount options received at mount time */
 #endif
+	struct backing_dev_info bdi;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5183bc2..ad235d6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data,
 	if (cifs_sb == NULL)
 		return -ENOMEM;
 
+	rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
+	if (rc) {
+		kfree(cifs_sb);
+		return rc;
+	}
+
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	/* copy mount params to sb for use in submounts */
 	/* BB: should we move this after the mount so we
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data,
 		int len = strlen(data);
 		cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
 		if (cifs_sb->mountdata == NULL) {
+			bdi_destroy(&cifs_sb->bdi);
 			kfree(sb->s_fs_info);
 			sb->s_fs_info = NULL;
 			return -ENOMEM;
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data,
 
 	sb->s_magic = CIFS_MAGIC_NUMBER;
 	sb->s_op = &cifs_super_ops;
+	sb->s_bdi = &cifs_sb->bdi;
 /*	if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
 	    sb->s_blocksize =
 		cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -183,6 +191,7 @@ out_mount_failed:
 		}
 #endif
 		unload_nls(cifs_sb->local_nls);
+		bdi_destroy(&cifs_sb->bdi);
 		kfree(cifs_sb);
 	}
 	return rc;
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb)
 #endif
 
 	unload_nls(cifs_sb->local_nls);
+	bdi_destroy(&cifs_sb->bdi);
 	kfree(cifs_sb);
 
 	unlock_kernel();
@@ -808,6 +818,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.release = cifs_close,
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
+	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
 	.unlocked_ioctl  = cifs_ioctl,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3f4fbd6..5d3f29f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1431,6 +1431,8 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	__u32 bytes_sent;
 	__u16 byte_count;
 
+	*nbytes = 0;
+
 	/* cFYI(1, ("write at %lld %d bytes", offset, count));*/
 	if (tcon->ses == NULL)
 		return -ECONNABORTED;
@@ -1513,11 +1515,18 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
 		cFYI(1, ("Send error in write = %d", rc));
-		*nbytes = 0;
 	} else {
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
 		*nbytes += le16_to_cpu(pSMBr->Count);
+
+		/*
+		 * Mask off high 16 bits when bytes written as returned by the
+		 * server is greater than bytes requested by the client. Some
+		 * OS/2 servers are known to set incorrect CountHigh values.
+		 */
+		if (*nbytes > count)
+			*nbytes &= 0xFFFF;
 	}
 
 	cifs_buf_release(pSMB);
@@ -1606,6 +1615,14 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
 		*nbytes += le16_to_cpu(pSMBr->Count);
+
+		/*
+		 * Mask off high 16 bits when bytes written as returned by the
+		 * server is greater than bytes requested by the client. OS/2
+		 * servers are known to set incorrect CountHigh values.
+		 */
+		if (*nbytes > count)
+			*nbytes &= 0xFFFF;
 	}
 
 /*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
@@ -1794,8 +1811,21 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 		}
 		parm_data = (struct cifs_posix_lock *)
 			((char *)&pSMBr->hdr.Protocol + data_offset);
-		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
+		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
 			pLockData->fl_type = F_UNLCK;
+		else {
+			if (parm_data->lock_type ==
+					__constant_cpu_to_le16(CIFS_RDLCK))
+				pLockData->fl_type = F_RDLCK;
+			else if (parm_data->lock_type ==
+					__constant_cpu_to_le16(CIFS_WRLCK))
+				pLockData->fl_type = F_WRLCK;
+
+			pLockData->fl_start = parm_data->start;
+			pLockData->fl_end = parm_data->start +
+						parm_data->length - 1;
+			pLockData->fl_pid = parm_data->pid;
+		}
 	}
 
 plk_err_exit:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 058b390..9b11a8f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -839,8 +839,32 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 
 		} else {
 			/* if rc == ERR_SHARING_VIOLATION ? */
-			rc = 0;	/* do not change lock type to unlock
-				   since range in use */
+			rc = 0;
+
+			if (lockType & LOCKING_ANDX_SHARED_LOCK) {
+				pfLock->fl_type = F_WRLCK;
+			} else {
+				rc = CIFSSMBLock(xid, tcon, netfid, length,
+					pfLock->fl_start, 0, 1,
+					lockType | LOCKING_ANDX_SHARED_LOCK,
+					0 /* wait flag */);
+				if (rc == 0) {
+					rc = CIFSSMBLock(xid, tcon, netfid,
+						length, pfLock->fl_start, 1, 0,
+						lockType |
+						LOCKING_ANDX_SHARED_LOCK,
+						0 /* wait flag */);
+					pfLock->fl_type = F_RDLCK;
+					if (rc != 0)
+						cERROR(1, ("Error unlocking "
+						"previously locked range %d "
+						"during test of lock", rc));
+					rc = 0;
+				} else {
+					pfLock->fl_type = F_WRLCK;
+					rc = 0;
+				}
+			}
 		}
 
 		FreeXid(xid);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dc..d97f993 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 		return -EBUSY;
 	}
 
+	error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
+	if (error)
+		goto bdi_err;
+
 	vc->vc_sb = sb;
 
 	sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits = 12;
 	sb->s_magic = CODA_SUPER_MAGIC;
 	sb->s_op = &coda_super_operations;
+	sb->s_bdi = &vc->bdi;
 
 	/* get root fid from Venus: this needs the root inode */
 	error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
         return 0;
 
  error:
+	bdi_destroy(&vc->bdi);
+ bdi_err:
 	if (root)
 		iput(root);
 	if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 
 static void coda_put_super(struct super_block *sb)
 {
+	bdi_destroy(&coda_vcp(sb)->bdi);
 	coda_vcp(sb)->vc_sb = NULL;
 	sb->s_fs_info = NULL;
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6..641640d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
 #include <linux/nbd.h>
 #include <linux/random.h>
 #include <linux/filter.h>
-#include <linux/pktcdvd.h>
 
 #include <linux/hiddev.h>
 
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
 COMPATIBLE_IOCTL(PPGETPHASE)
 COMPATIBLE_IOCTL(PPGETFLAGS)
 COMPATIBLE_IOCTL(PPSETFLAGS)
-/* pktcdvd */
-COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
 /* Big A */
 /* sparc only */
 /* Big Q for sound/OSS */
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index efb2b94..1cc0876 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -382,8 +382,8 @@ out:
 static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
 					     struct ecryptfs_crypt_stat *crypt_stat)
 {
-	(*offset) = (crypt_stat->num_header_bytes_at_front
-		     + (crypt_stat->extent_size * extent_num));
+	(*offset) = ecryptfs_lower_header_size(crypt_stat)
+		    + (crypt_stat->extent_size * extent_num);
 }
 
 /**
@@ -835,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
 	set_extent_mask_and_shift(crypt_stat);
 	crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-		crypt_stat->num_header_bytes_at_front = 0;
+		crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 	else {
 		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
-			crypt_stat->num_header_bytes_at_front =
+			crypt_stat->metadata_size =
 				ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 		else
-			crypt_stat->num_header_bytes_at_front =	PAGE_CACHE_SIZE;
+			crypt_stat->metadata_size = PAGE_CACHE_SIZE;
 	}
 }
 
@@ -1108,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
 	(*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
 }
 
-static void
-write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
-		     size_t *written)
+void ecryptfs_write_crypt_stat_flags(char *page_virt,
+				     struct ecryptfs_crypt_stat *crypt_stat,
+				     size_t *written)
 {
 	u32 flags = 0;
 	int i;
@@ -1238,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt,
 
 	header_extent_size = (u32)crypt_stat->extent_size;
 	num_header_extents_at_front =
-		(u16)(crypt_stat->num_header_bytes_at_front
-		      / crypt_stat->extent_size);
+		(u16)(crypt_stat->metadata_size / crypt_stat->extent_size);
 	put_unaligned_be32(header_extent_size, virt);
 	virt += 4;
 	put_unaligned_be16(num_header_extents_at_front, virt);
@@ -1292,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
 	offset = ECRYPTFS_FILE_SIZE_BYTES;
 	write_ecryptfs_marker((page_virt + offset), &written);
 	offset += written;
-	write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
+	ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat,
+					&written);
 	offset += written;
 	ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
 				       &written);
@@ -1382,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 		rc = -EINVAL;
 		goto out;
 	}
-	virt_len = crypt_stat->num_header_bytes_at_front;
+	virt_len = crypt_stat->metadata_size;
 	order = get_order(virt_len);
 	/* Released in this function */
 	virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
@@ -1428,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
 	header_extent_size = get_unaligned_be32(virt);
 	virt += sizeof(__be32);
 	num_header_extents_at_front = get_unaligned_be16(virt);
-	crypt_stat->num_header_bytes_at_front =
-		(((size_t)num_header_extents_at_front
-		  * (size_t)header_extent_size));
+	crypt_stat->metadata_size = (((size_t)num_header_extents_at_front
+				     * (size_t)header_extent_size));
 	(*bytes_read) = (sizeof(__be32) + sizeof(__be16));
 	if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
-	    && (crypt_stat->num_header_bytes_at_front
+	    && (crypt_stat->metadata_size
 		< ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
 		rc = -EINVAL;
 		printk(KERN_WARNING "Invalid header size: [%zd]\n",
-		       crypt_stat->num_header_bytes_at_front);
+		       crypt_stat->metadata_size);
 	}
 	return rc;
 }
@@ -1452,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
  */
 static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
 {
-	crypt_stat->num_header_bytes_at_front =
-		ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
+	crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 }
 
 /**
@@ -1607,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 						ecryptfs_dentry,
 						ECRYPTFS_VALIDATE_HEADER_SIZE);
 	if (rc) {
+		memset(page_virt, 0, PAGE_CACHE_SIZE);
 		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 542f625..bfc2e0f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
 #include <linux/scatterlist.h>
 #include <linux/hash.h>
 #include <linux/nsproxy.h>
+#include <linux/backing-dev.h>
 
 /* Version verification for shared data structures w/ userspace */
 #define ECRYPTFS_VERSION_MAJOR 0x00
@@ -273,7 +274,7 @@ struct ecryptfs_crypt_stat {
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
-	size_t num_header_bytes_at_front;
+	size_t metadata_size;
 	size_t extent_size; /* Data extent size; default is 4096 */
 	size_t key_size;
 	size_t extent_shift;
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
 struct ecryptfs_sb_info {
 	struct super_block *wsi_sb;
 	struct ecryptfs_mount_crypt_stat mount_crypt_stat;
+	struct backing_dev_info bdi;
 };
 
 /* file private data. */
@@ -464,6 +466,14 @@ struct ecryptfs_daemon {
 
 extern struct mutex ecryptfs_daemon_hash_mux;
 
+static inline size_t
+ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
+{
+	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+		return 0;
+	return crypt_stat->metadata_size;
+}
+
 static inline struct ecryptfs_file_info *
 ecryptfs_file_to_private(struct file *file)
 {
@@ -651,6 +661,9 @@ int ecryptfs_decrypt_page(struct page *page);
 int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
 int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
 int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
+void ecryptfs_write_crypt_stat_flags(char *page_virt,
+				     struct ecryptfs_crypt_stat *crypt_stat,
+				     size_t *written);
 int ecryptfs_read_and_validate_header_region(char *data,
 					     struct inode *ecryptfs_inode);
 int ecryptfs_read_and_validate_xattr_region(char *page_virt,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index d3362fa..e2d4418 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -324,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 	rc = ecryptfs_read_and_validate_header_region(page_virt,
 						      ecryptfs_dentry->d_inode);
 	if (rc) {
+		memset(page_virt, 0, PAGE_CACHE_SIZE);
 		rc = ecryptfs_read_and_validate_xattr_region(page_virt,
 							     ecryptfs_dentry);
 		if (rc) {
@@ -336,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-			file_size = (crypt_stat->num_header_bytes_at_front
+			file_size = (crypt_stat->metadata_size
 				     + i_size_read(lower_dentry->d_inode));
 		else
 			file_size = i_size_read(lower_dentry->d_inode);
@@ -388,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
-		       "lower_dentry = [%s]\n", __func__, rc,
-		       ecryptfs_dentry->d_name.name);
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+				"[%d] on lower_dentry = [%s]\n", __func__, rc,
+				encrypted_and_encoded_name);
 		goto out_d_drop;
 	}
 	if (lower_dentry->d_inode)
@@ -417,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
-		       "lower_dentry = [%s]\n", __func__, rc,
-		       encrypted_and_encoded_name);
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+				"[%d] on lower_dentry = [%s]\n", __func__, rc,
+				encrypted_and_encoded_name);
 		goto out_d_drop;
 	}
 lookup_and_interpose:
@@ -456,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
 	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
-	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
 	old_dentry->d_inode->i_nlink =
 		ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
 	i_size_write(new_dentry->d_inode, file_size_save);
@@ -648,38 +649,17 @@ out_lock:
 	return rc;
 }
 
-static int
-ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf,
+				   size_t *bufsiz)
 {
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	char *lower_buf;
-	size_t lower_bufsiz;
-	struct dentry *lower_dentry;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-	char *plaintext_name;
-	size_t plaintext_name_size;
+	size_t lower_bufsiz = PATH_MAX;
 	mm_segment_t old_fs;
 	int rc;
 
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!lower_dentry->d_inode->i_op->readlink) {
-		rc = -EINVAL;
-		goto out;
-	}
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-						dentry->d_sb)->mount_crypt_stat;
-	/*
-	 * If the lower filename is encrypted, it will result in a significantly
-	 * longer name.  If needed, truncate the name after decode and decrypt.
-	 */
-	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
-		lower_bufsiz = PATH_MAX;
-	else
-		lower_bufsiz = bufsiz;
-	/* Released in this function */
 	lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
-	if (lower_buf == NULL) {
-		printk(KERN_ERR "%s: Out of memory whilst attempting to "
-		       "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
+	if (!lower_buf) {
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -689,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
 						   (char __user *)lower_buf,
 						   lower_bufsiz);
 	set_fs(old_fs);
-	if (rc >= 0) {
-		rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
-							  &plaintext_name_size,
-							  dentry, lower_buf,
-							  rc);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to decode and "
-			       "decrypt filename; rc = [%d]\n", __func__,
-				rc);
-			goto out_free_lower_buf;
-		}
-		/* Check for bufsiz <= 0 done in sys_readlinkat() */
-		rc = copy_to_user(buf, plaintext_name,
-				  min((size_t) bufsiz, plaintext_name_size));
-		if (rc)
-			rc = -EFAULT;
-		else
-			rc = plaintext_name_size;
-		kfree(plaintext_name);
-		fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
-	}
-out_free_lower_buf:
+	if (rc < 0)
+		goto out;
+	lower_bufsiz = rc;
+	rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry,
+						  lower_buf, lower_bufsiz);
+out:
 	kfree(lower_buf);
+	return rc;
+}
+
+static int
+ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	char *kbuf;
+	size_t kbufsiz, copied;
+	int rc;
+
+	rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz);
+	if (rc)
+		goto out;
+	copied = min_t(size_t, bufsiz, kbufsiz);
+	rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied;
+	kfree(kbuf);
+	fsstack_copy_attr_atime(dentry->d_inode,
+				ecryptfs_dentry_to_lower(dentry)->d_inode);
 out:
 	return rc;
 }
@@ -769,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
 {
 	loff_t lower_size;
 
-	lower_size = crypt_stat->num_header_bytes_at_front;
+	lower_size = ecryptfs_lower_header_size(crypt_stat);
 	if (upper_size != 0) {
 		loff_t num_extents;
 
@@ -1016,6 +998,28 @@ out:
 	return rc;
 }
 
+int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
+			  struct kstat *stat)
+{
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+	int rc = 0;
+
+	mount_crypt_stat = &ecryptfs_superblock_to_private(
+						dentry->d_sb)->mount_crypt_stat;
+	generic_fillattr(dentry->d_inode, stat);
+	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
+		char *target;
+		size_t targetsiz;
+
+		rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz);
+		if (!rc) {
+			kfree(target);
+			stat->size = targetsiz;
+		}
+	}
+	return rc;
+}
+
 int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		     struct kstat *stat)
 {
@@ -1040,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->setxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1058,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
 	int rc = 0;
 
 	if (!lower_dentry->d_inode->i_op->getxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1085,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->listxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1102,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->removexattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1133,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = {
 	.put_link = ecryptfs_put_link,
 	.permission = ecryptfs_permission,
 	.setattr = ecryptfs_setattr,
+	.getattr = ecryptfs_getattr_link,
 	.setxattr = ecryptfs_setxattr,
 	.getxattr = ecryptfs_getxattr,
 	.listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f0..760983d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
 static int
 ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 {
+	struct ecryptfs_sb_info *esi;
 	int rc = 0;
 
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
 					kmem_cache_zalloc(ecryptfs_sb_info_cache,
 							 GFP_KERNEL));
-	if (!ecryptfs_superblock_to_private(sb)) {
+	esi = ecryptfs_superblock_to_private(sb);
+	if (!esi) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
+
+	rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
+	if (rc)
+		goto out;
+
+	sb->s_bdi = &esi->bdi;
 	sb->s_op = &ecryptfs_sops;
 	/* Released through deactivate_super(sb) from get_sb_nodev */
 	sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index d491237..2ee9a3a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -83,6 +83,19 @@ out:
 	return rc;
 }
 
+static void strip_xattr_flag(char *page_virt,
+			     struct ecryptfs_crypt_stat *crypt_stat)
+{
+	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+		size_t written;
+
+		crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR;
+		ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat,
+						&written);
+		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+	}
+}
+
 /**
  *   Header Extent:
  *     Octets 0-7:        Unencrypted file size (big-endian)
@@ -98,19 +111,6 @@ out:
  *                        (big-endian)
  *     Octet  26:         Begin RFC 2440 authentication token packet set
  */
-static void set_header_info(char *page_virt,
-			    struct ecryptfs_crypt_stat *crypt_stat)
-{
-	size_t written;
-	size_t save_num_header_bytes_at_front =
-		crypt_stat->num_header_bytes_at_front;
-
-	crypt_stat->num_header_bytes_at_front =
-		ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
-	ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
-	crypt_stat->num_header_bytes_at_front =
-		save_num_header_bytes_at_front;
-}
 
 /**
  * ecryptfs_copy_up_encrypted_with_header
@@ -136,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 					   * num_extents_per_page)
 					  + extent_num_in_page);
 		size_t num_header_extents_at_front =
-			(crypt_stat->num_header_bytes_at_front
-			 / crypt_stat->extent_size);
+			(crypt_stat->metadata_size / crypt_stat->extent_size);
 
 		if (view_extent_num < num_header_extents_at_front) {
 			/* This is a header extent */
@@ -147,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			memset(page_virt, 0, PAGE_CACHE_SIZE);
 			/* TODO: Support more than one header extent */
 			if (view_extent_num == 0) {
+				size_t written;
+
 				rc = ecryptfs_read_xattr_region(
 					page_virt, page->mapping->host);
-				set_header_info(page_virt, crypt_stat);
+				strip_xattr_flag(page_virt + 16, crypt_stat);
+				ecryptfs_write_header_metadata(page_virt + 20,
+							       crypt_stat,
+							       &written);
 			}
 			kunmap_atomic(page_virt, KM_USER0);
 			flush_dcache_page(page);
@@ -162,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			/* This is an encrypted data extent */
 			loff_t lower_offset =
 				((view_extent_num * crypt_stat->extent_size)
-				 - crypt_stat->num_header_bytes_at_front);
+				 - crypt_stat->metadata_size);
 
 			rc = ecryptfs_read_lower_page_segment(
 				page, (lower_offset >> PAGE_CACHE_SHIFT),
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fcef41c..0c0ae49 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 		if (lower_dentry->d_inode) {
 			fput(inode_info->lower_file);
 			inode_info->lower_file = NULL;
-			d_drop(lower_dentry);
 		}
 	}
 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
@@ -123,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
 	lock_kernel();
 
 	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
+	bdi_destroy(&sb_info->bdi);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
 
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e35..22721b2 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
 
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/backing-dev.h>
 #include "common.h"
 
 /* FIXME: Remove once pnfs hits mainline
@@ -84,6 +85,7 @@ struct exofs_sb_info {
 	u32		s_next_generation;	/* next gen # to use          */
 	atomic_t	s_curr_pending;		/* number of pending commands */
 	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
+	struct 		backing_dev_info bdi;	/* register our bdi with VFS  */
 
 	struct pnfs_osd_data_map data_map;	/* Default raid to use
 						 * FIXME: Needed ?
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea..03149b9 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
 	_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
 			    sbi->layout.s_pid);
 
+	bdi_destroy(&sbi->bdi);
 	exofs_free_sbi(sbi);
 	sb->s_fs_info = NULL;
 }
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi)
 		return -ENOMEM;
 
+	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+	if (ret)
+		goto free_bdi;
+
 	/* use mount options to fill superblock */
 	od = osduld_path_lookup(opts->dev_name);
 	if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* set up operation vectors */
+	sb->s_bdi = &sbi->bdi;
 	sb->s_fs_info = sbi;
 	sb->s_op = &exofs_sops;
 	sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 
 free_sbi:
+	bdi_destroy(&sbi->bdi);
+free_bdi:
 	EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
 		  opts->dev_name, sbi->layout.s_pid, ret);
 	exofs_free_sbi(sbi);
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 4e2426e..565cf81 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -32,6 +32,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.setattr	= ext2_setattr,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
@@ -43,6 +44,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
 const struct inode_operations ext2_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= ext2_follow_link,
+	.setattr	= ext2_setattr,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index ff7b4cc..7c48982 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -34,6 +34,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.setattr	= ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
@@ -45,6 +46,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
 const struct inode_operations ext3_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= ext3_follow_link,
+	.setattr	= ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee8..236b834 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
 		physical += offset;
 		length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
 		flags |= FIEMAP_EXTENT_DATA_INLINE;
+		brelse(iloc.bh);
 	} else { /* external block */
 		physical = EXT4_I(inode)->i_file_acl << blockbits;
 		length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5381802..81d6054 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 	} else {
 		struct ext4_iloc iloc;
 
-		err = ext4_get_inode_loc(inode, &iloc);
+		err = __ext4_get_inode_loc(inode, &iloc, 0);
 		if (err)
 			return err;
 		if (wbc->sync_mode == WB_SYNC_ALL)
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 				   (unsigned long long)iloc.bh->b_blocknr);
 			err = -EIO;
 		}
+		brelse(iloc.bh);
 	}
 	return err;
 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bde9d0b..b423a36 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
 			 entry->count, entry->group, entry);
 
+		if (test_opt(sb, DISCARD)) {
+			ext4_fsblk_t discard_block;
+
+			discard_block = entry->start_blk +
+				ext4_group_first_block_no(sb, entry->group);
+			trace_ext4_discard_blocks(sb,
+					(unsigned long long)discard_block,
+					entry->count);
+			sb_issue_discard(sb, discard_block, entry->count);
+		}
+
 		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
 		/* we expect to find existing buddy because it's pinned */
 		BUG_ON(err != 0);
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 			page_cache_release(e4b.bd_bitmap_page);
 		}
 		ext4_unlock_group(sb, entry->group);
-		if (test_opt(sb, DISCARD)) {
-			ext4_fsblk_t discard_block;
-
-			discard_block = entry->start_blk +
-				ext4_group_first_block_no(sb, entry->group);
-			trace_ext4_discard_blocks(sb,
-					(unsigned long long)discard_block,
-					entry->count);
-			sb_issue_discard(sb, discard_block, entry->count);
-		}
 		kmem_cache_free(ext4_free_ext_cachep, entry);
 		ext4_mb_release_desc(&e4b);
 	}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 781a322..4b37f7c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -554,108 +554,85 @@ select_queue:
 	return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block **psb)
+static void unpin_sb_for_writeback(struct super_block *sb)
 {
-	struct super_block *sb = *psb;
-
-	if (sb) {
-		up_read(&sb->s_umount);
-		put_super(sb);
-		*psb = NULL;
-	}
+	up_read(&sb->s_umount);
+	put_super(sb);
 }
 
+enum sb_pin_state {
+	SB_PINNED,
+	SB_NOT_PINNED,
+	SB_PIN_FAILED
+};
+
 /*
  * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
- *
- * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
- * 1 if we failed.
  */
-static int pin_sb_for_writeback(struct writeback_control *wbc,
-				struct inode *inode, struct super_block **psb)
+static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+					      struct super_block *sb)
 {
-	struct super_block *sb = inode->i_sb;
-
-	/*
-	 * If this sb is already pinned, nothing more to do. If not and
-	 * *psb is non-NULL, unpin the old one first
-	 */
-	if (sb == *psb)
-		return 0;
-	else if (*psb)
-		unpin_sb_for_writeback(psb);
-
 	/*
 	 * Caller must already hold the ref for this
 	 */
 	if (wbc->sync_mode == WB_SYNC_ALL) {
 		WARN_ON(!rwsem_is_locked(&sb->s_umount));
-		return 0;
+		return SB_NOT_PINNED;
 	}
-
 	spin_lock(&sb_lock);
 	sb->s_count++;
 	if (down_read_trylock(&sb->s_umount)) {
 		if (sb->s_root) {
 			spin_unlock(&sb_lock);
-			goto pinned;
+			return SB_PINNED;
 		}
 		/*
 		 * umounted, drop rwsem again and fall through to failure
 		 */
 		up_read(&sb->s_umount);
 	}
-
 	sb->s_count--;
 	spin_unlock(&sb_lock);
-	return 1;
-pinned:
-	*psb = sb;
-	return 0;
+	return SB_PIN_FAILED;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-				struct writeback_control *wbc)
+/*
+ * Write a portion of b_io inodes which belong to @sb.
+ * If @wbc->sb != NULL, then find and write all such
+ * inodes. Otherwise write only ones which go sequentially
+ * in reverse order.
+ * Return 1, if the caller writeback routine should be
+ * interrupted. Otherwise return 0.
+ */
+static int writeback_sb_inodes(struct super_block *sb,
+			       struct bdi_writeback *wb,
+			       struct writeback_control *wbc)
 {
-	struct super_block *sb = wbc->sb, *pin_sb = NULL;
-	const unsigned long start = jiffies;	/* livelock avoidance */
-
-	spin_lock(&inode_lock);
-
-	if (!wbc->for_kupdate || list_empty(&wb->b_io))
-		queue_io(wb, wbc->older_than_this);
-
 	while (!list_empty(&wb->b_io)) {
-		struct inode *inode = list_entry(wb->b_io.prev,
-						struct inode, i_list);
 		long pages_skipped;
-
-		/*
-		 * super block given and doesn't match, skip this inode
-		 */
-		if (sb && sb != inode->i_sb) {
+		struct inode *inode = list_entry(wb->b_io.prev,
+						 struct inode, i_list);
+		if (wbc->sb && sb != inode->i_sb) {
+			/* super block given and doesn't
+			   match, skip this inode */
 			redirty_tail(inode);
 			continue;
 		}
-
+		if (sb != inode->i_sb)
+			/* finish with this superblock */
+			return 0;
 		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
 			requeue_io(inode);
 			continue;
 		}
-
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
 		 */
-		if (inode_dirtied_after(inode, start))
-			break;
-
-		if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
-			requeue_io(inode);
-			continue;
-		}
+		if (inode_dirtied_after(inode, wbc->wb_start))
+			return 1;
 
 		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
 		__iget(inode);
@@ -674,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
-			break;
+			return 1;
 		}
 		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
+	/* b_io is empty */
+	return 1;
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+				struct writeback_control *wbc)
+{
+	int ret = 0;
 
-	unpin_sb_for_writeback(&pin_sb);
+	wbc->wb_start = jiffies; /* livelock avoidance */
+	spin_lock(&inode_lock);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
+
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
+						 struct inode, i_list);
+		struct super_block *sb = inode->i_sb;
+		enum sb_pin_state state;
+
+		if (wbc->sb && sb != wbc->sb) {
+			/* super block given and doesn't
+			   match, skip this inode */
+			redirty_tail(inode);
+			continue;
+		}
+		state = pin_sb_for_writeback(wbc, sb);
+
+		if (state == SB_PIN_FAILED) {
+			requeue_io(inode);
+			continue;
+		}
+		ret = writeback_sb_inodes(sb, wb, wbc);
 
+		if (state == SB_PINNED)
+			unpin_sb_for_writeback(sb);
+		if (ret)
+			break;
+	}
 	spin_unlock(&inode_lock);
 	/* Leave any unwritten inodes on b_io */
 }
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 46435f3a..4765190 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -165,8 +165,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_object_lookups),
 		   atomic_read(&fscache_n_object_lookups_negative),
 		   atomic_read(&fscache_n_object_lookups_positive),
-		   atomic_read(&fscache_n_object_lookups_timed_out),
-		   atomic_read(&fscache_n_object_created));
+		   atomic_read(&fscache_n_object_created),
+		   atomic_read(&fscache_n_object_lookups_timed_out));
 
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
 		   atomic_read(&fscache_n_updates),
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..7faefb4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 
 #ifdef CONFIG_BLOCK
 
-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+	return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+	return (blk << inode->i_blkbits);
+}
 
 /**
  * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
  *
  * This does FIEMAP for block based inodes.  Basically it will just loop
  * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  */
 
 int __generic_block_fiemap(struct inode *inode,
-			   struct fiemap_extent_info *fieinfo, u64 start,
-			   u64 len, get_block_t *get_block)
+			   struct fiemap_extent_info *fieinfo, loff_t start,
+			   loff_t len, get_block_t *get_block)
 {
-	struct buffer_head tmp;
-	unsigned long long start_blk;
-	long long length = 0, map_len = 0;
+	struct buffer_head map_bh;
+	sector_t start_blk, last_blk;
+	loff_t isize = i_size_read(inode);
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = FIEMAP_EXTENT_MERGED;
-	int ret = 0, past_eof = 0, whole_file = 0;
+	bool past_eof = false, whole_file = false;
+	int ret = 0;
 
-	if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
 		return ret;
 
-	start_blk = logical_to_blk(inode, start);
-
-	length = (long long)min_t(u64, len, i_size_read(inode));
-	if (length < len)
-		whole_file = 1;
+	/*
+	 * Either the i_mutex or other appropriate locking needs to be held
+	 * since we expect isize to not change at all through the duration of
+	 * this call.
+	 */
+	if (len >= isize) {
+		whole_file = true;
+		len = isize;
+	}
 
-	map_len = length;
+	start_blk = logical_to_blk(inode, start);
+	last_blk = logical_to_blk(inode, start + len - 1);
 
 	do {
 		/*
 		 * we set b_size to the total size we want so it will map as
 		 * many contiguous blocks as possible at once
 		 */
-		memset(&tmp, 0, sizeof(struct buffer_head));
-		tmp.b_size = map_len;
+		memset(&map_bh, 0, sizeof(struct buffer_head));
+		map_bh.b_size = len;
 
-		ret = get_block(inode, start_blk, &tmp, 0);
+		ret = get_block(inode, start_blk, &map_bh, 0);
 		if (ret)
 			break;
 
 		/* HOLE */
-		if (!buffer_mapped(&tmp)) {
-			length -= blk_to_logical(inode, 1);
+		if (!buffer_mapped(&map_bh)) {
 			start_blk++;
 
 			/*
-			 * we want to handle the case where there is an
+			 * We want to handle the case where there is an
 			 * allocated block at the front of the file, and then
 			 * nothing but holes up to the end of the file properly,
 			 * to make sure that extent at the front gets properly
 			 * marked with FIEMAP_EXTENT_LAST
 			 */
 			if (!past_eof &&
-			    blk_to_logical(inode, start_blk) >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
+			    blk_to_logical(inode, start_blk) >= isize)
 				past_eof = 1;
 
 			/*
-			 * first hole after going past the EOF, this is our
+			 * First hole after going past the EOF, this is our
 			 * last extent
 			 */
 			if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
-				break;
+			} else if (size) {
+				ret = fiemap_fill_next_extent(fieinfo, logical,
+							      phys, size, flags);
+				size = 0;
 			}
 
 			/* if we have holes up to/past EOF then we're done */
-			if (length <= 0 || past_eof)
+			if (start_blk > last_blk || past_eof || ret)
 				break;
 		} else {
 			/*
-			 * we have gone over the length of what we wanted to
+			 * We have gone over the length of what we wanted to
 			 * map, and it wasn't the entire file, so add the extent
 			 * we got last time and exit.
 			 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
 			 * are good to go, just add the extent to the fieinfo
 			 * and break
 			 */
-			if (length <= 0 && !whole_file) {
+			if (start_blk > last_blk && !whole_file) {
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
 			}
 
 			logical = blk_to_logical(inode, start_blk);
-			phys = blk_to_logical(inode, tmp.b_blocknr);
-			size = tmp.b_size;
+			phys = blk_to_logical(inode, map_bh.b_blocknr);
+			size = map_bh.b_size;
 			flags = FIEMAP_EXTENT_MERGED;
 
-			length -= tmp.b_size;
 			start_blk += logical_to_blk(inode, size);
 
 			/*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
 			 * soon as we find a hole that the last extent we found
 			 * is marked with FIEMAP_EXTENT_LAST
 			 */
-			if (!past_eof &&
-			    logical+size >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
-				past_eof = 1;
+			if (!past_eof && logical + size >= isize)
+				past_eof = true;
 		}
 		cond_resched();
 	} while (1);
 
-	/* if ret is 1 then we just hit the end of the extent array */
+	/* If ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9dd1262..ed9ba6f 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
 			inode->i_op = &page_symlink_inode_operations;
 			inode->i_mapping->a_ops = &jfs_aops;
 		} else {
-			inode->i_op = &jfs_symlink_inode_operations;
+			inode->i_op = &jfs_fast_symlink_inode_operations;
 			/*
 			 * The inline data should be null-terminated, but
 			 * don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 6c4dfcbf..9e2f6a7 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap)
 	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
 	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
 	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
-	bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
+	bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
 	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
 	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
 	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap)
 	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
 	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
 	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
-	dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
+	dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
 	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
 	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
 	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 	 * tree index of this allocation group within the control page.
 	 */
 	agperlev =
-	    (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
+	    (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
 	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
 
 	/* dmap control page trees fan-out by 4 and a single allocation
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 		 * the subtree to find the leftmost leaf that describes this
 		 * free space.
 		 */
-		for (k = bmp->db_agheigth; k > 0; k--) {
+		for (k = bmp->db_agheight; k > 0; k--) {
 			for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
 				if (l2nb <= dcp->stree[m + n]) {
 					ti = m + n;
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
 	}
 
 	/*
-	 * compute db_aglevel, db_agheigth, db_width, db_agstart:
+	 * compute db_aglevel, db_agheight, db_width, db_agstart:
 	 * an ag is covered in aglevel dmapctl summary tree,
 	 * at agheight level height (from leaf) with agwidth number of nodes
 	 * each, which starts at agstart index node of the smmary tree node
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
 	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
 	l2nl =
 	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
-	bmp->db_agheigth = l2nl >> 1;
-	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
-	for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
+	bmp->db_agheight = l2nl >> 1;
+	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
+	for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
 	     i--) {
 		bmp->db_agstart += n;
 		n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41..6dcb906 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
 	__le32 dn_maxag;	/* 4: max active alloc group number	*/
 	__le32 dn_agpref;	/* 4: preferred alloc group (hint)	*/
 	__le32 dn_aglevel;	/* 4: dmapctl level holding the AG	*/
-	__le32 dn_agheigth;	/* 4: height in dmapctl of the AG	*/
+	__le32 dn_agheight;	/* 4: height in dmapctl of the AG	*/
 	__le32 dn_agwidth;	/* 4: width in dmapctl of the AG	*/
 	__le32 dn_agstart;	/* 4: start tree index at AG height	*/
 	__le32 dn_agl2size;	/* 4: l2 num of blks per alloc group	*/
@@ -229,7 +229,7 @@ struct dbmap {
 	int dn_maxag;		/* max active alloc group number	*/
 	int dn_agpref;		/* preferred alloc group (hint)		*/
 	int dn_aglevel;		/* dmapctl level holding the AG		*/
-	int dn_agheigth;	/* height in dmapctl of the AG		*/
+	int dn_agheight;	/* height in dmapctl of the AG		*/
 	int dn_agwidth;		/* width in dmapctl of the AG		*/
 	int dn_agstart;		/* start tree index at AG height	*/
 	int dn_agl2size;	/* l2 num of blks per alloc group	*/
@@ -255,7 +255,7 @@ struct bmap {
 #define	db_agsize	db_bmap.dn_agsize
 #define	db_agl2size	db_bmap.dn_agl2size
 #define	db_agwidth	db_bmap.dn_agwidth
-#define	db_agheigth	db_bmap.dn_agheigth
+#define	db_agheight	db_bmap.dn_agheight
 #define	db_agstart	db_bmap.dn_agstart
 #define	db_numag	db_bmap.dn_numag
 #define	db_maxlevel	db_bmap.dn_maxlevel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 79e2c79..9e6bda3 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations;
 extern const struct inode_operations jfs_file_inode_operations;
 extern const struct file_operations jfs_file_operations;
 extern const struct inode_operations jfs_symlink_inode_operations;
+extern const struct inode_operations jfs_fast_symlink_inode_operations;
 extern const struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4a3e9f3..a9cf8e8 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	 */
 
 	if (ssize <= IDATASIZE) {
-		ip->i_op = &jfs_symlink_inode_operations;
+		ip->i_op = &jfs_fast_symlink_inode_operations;
 
 		i_fastsymlink = JFS_IP(ip)->i_inline;
 		memcpy(i_fastsymlink, name, ssize);
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	else {
 		jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
 
-		ip->i_op = &page_symlink_inode_operations;
+		ip->i_op = &jfs_symlink_inode_operations;
 		ip->i_mapping->a_ops = &jfs_aops;
 
 		/*
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0b..1aba003 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	struct inode *iplist[1];
 	struct jfs_superblock *j_sb, *j_sb2;
 	uint old_agsize;
+	int agsizechanged = 0;
 	struct buffer_head *bh, *bh2;
 
 	/* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 */
 	if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
 		goto error_out;
+
+	agsizechanged |= (bmp->db_agsize != old_agsize);
+
 	/*
 	 * the map now has extended to cover additional nblocks:
 	 * dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 * will correctly identify the new ag);
 	 */
 	/* if new AG size the same as old AG size, done! */
-	if (bmp->db_agsize != old_agsize) {
+	if (agsizechanged) {
 		if ((rc = diExtendFS(ipimap, ipbmap)))
 			goto error_out;
 
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05..205b946 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return NULL;
 }
 
-const struct inode_operations jfs_symlink_inode_operations = {
+const struct inode_operations jfs_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= jfs_follow_link,
+	.setattr	= jfs_setattr,
+	.setxattr	= jfs_setxattr,
+	.getxattr	= jfs_getxattr,
+	.listxattr	= jfs_listxattr,
+	.removexattr	= jfs_removexattr,
+};
+
+const struct inode_operations jfs_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+	.setattr	= jfs_setattr,
 	.setxattr	= jfs_setxattr,
 	.getxattr	= jfs_getxattr,
 	.listxattr	= jfs_listxattr,
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 84e36f5..76c242f 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
 	struct logfs_block *block;
 	int round, progress, last_progress = 0;
 
+	/*
+	 * Doing too many changes to the segfile at once would result
+	 * in a large number of aliases.  Write the journal before
+	 * things get out of hand.
+	 */
+	if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
+		logfs_write_anchor(sb);
+
 	if (no_free_segments(sb) >= target &&
 			super->s_no_object_aliases < MAX_OBJ_ALIASES)
 		return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 33bd260..fb0a613 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area)
 static int journal_erase_segment(struct logfs_area *area)
 {
 	struct super_block *sb = area->a_sb;
-	struct logfs_segment_header sh;
+	union {
+		struct logfs_segment_header sh;
+		unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
+	} u;
 	u64 ofs;
 	int err;
 
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area)
 	if (err)
 		return err;
 
-	sh.pad = 0;
-	sh.type = SEG_JOURNAL;
-	sh.level = 0;
-	sh.segno = cpu_to_be32(area->a_segno);
-	sh.ec = cpu_to_be32(area->a_erase_count);
-	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
-	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+	memset(&u, 0, sizeof(u));
+	u.sh.pad = 0;
+	u.sh.type = SEG_JOURNAL;
+	u.sh.level = 0;
+	u.sh.segno = cpu_to_be32(area->a_segno);
+	u.sh.ec = cpu_to_be32(area->a_erase_count);
+	u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
 
 	/* This causes a bug in segment.c.  Not yet. */
 	//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
 
 	ofs = dev_ofs(sb, area->a_segno, 0);
-	area->a_used_bytes = ALIGN(sizeof(sh), 16);
-	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	area->a_used_bytes = sizeof(u);
+	logfs_buf_write(area, ofs, &u, sizeof(u));
 	return 0;
 }
 
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb)
 
 	btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
 	btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+	btree_grim_visitor32(&tree->segment_map, 0, NULL);
+	tree->no_shadowed_segments = 0;
 
 	if (li->li_block) {
 		/*
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
 	if (len == 0)
 		return logfs_write_header(super, header, 0, type);
 
+	BUG_ON(len > sb->s_blocksize);
 	compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
 	if (compr_len < 0 || type == JE_ANCHOR) {
-		BUG_ON(len > sb->s_blocksize);
 		memcpy(data, buf, len);
 		compr_len = len;
 		compr = COMPR_NONE;
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
 	if (ofs < 0)
 		return ofs;
 	logfs_buf_write(area, ofs, super->s_compressed_je, len);
+	BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
 	super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
 	return 0;
 }
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0ee..0a3df1a 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
  * struct shadow_tree
  * @new:			shadows where old_ofs==0, indexed by new_ofs
  * @old:			shadows where old_ofs!=0, indexed by old_ofs
+ * @segment_map:		bitfield of segments containing shadows
+ * @no_shadowed_segment:	number of segments containing shadows
  */
 struct shadow_tree {
 	struct btree_head64 new;
 	struct btree_head64 old;
+	struct btree_head32 segment_map;
+	int no_shadowed_segments;
 };
 
 struct object_alias_item {
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
 		level_t level, int child_no, __be64 val);
 struct logfs_block_ops {
 	void	(*write_block)(struct logfs_block *block);
-	gc_level_t	(*block_level)(struct logfs_block *block);
 	void	(*free_block)(struct super_block *sb, struct logfs_block*block);
 	int	(*write_alias)(struct super_block *sb,
 			struct logfs_block *block,
 			write_alias_t *write_one_alias);
 };
 
+#define MAX_JOURNAL_ENTRIES 256
+
 struct logfs_super {
 	struct mtd_info *s_mtd;			/* underlying device */
 	struct block_device *s_bdev;		/* underlying device */
@@ -378,7 +383,7 @@ struct logfs_super {
 	u32	 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
 	u64	 s_last_version;
 	struct logfs_area *s_journal_area;	/* open journal segment */
-	__be64	s_je_array[64];
+	__be64	s_je_array[MAX_JOURNAL_ENTRIES];
 	int	s_no_je;
 
 	int	 s_sum_index;			/* for the 12 summaries */
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
 	return logfs_super(sb)->s_area[(__force u8)gc_level];
 }
 
+static inline void logfs_mempool_destroy(mempool_t *pool)
+{
+	if (pool)
+		mempool_destroy(pool);
+}
+
 #endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index bff4025..3159db6 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
 	}
 }
 
-static gc_level_t inode_block_level(struct logfs_block *block)
-{
-	BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
-	return GC_LEVEL(LOGFS_MAX_LEVELS);
-}
-
-static gc_level_t indirect_block_level(struct logfs_block *block)
-{
-	struct page *page;
-	struct inode *inode;
-	u64 bix;
-	level_t level;
-
-	page = block->page;
-	inode = page->mapping->host;
-	logfs_unpack_index(page->index, &bix, &level);
-	return expand_level(inode->i_ino, level);
-}
-
 /*
  * This silences a false, yet annoying gcc warning.  I hate it when my editor
  * jumps into bitops.h each time I recompile this file.
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
 
 static struct logfs_block_ops inode_block_ops = {
 	.write_block = inode_write_block,
-	.block_level = inode_block_level,
 	.free_block = inode_free_block,
 	.write_alias = inode_write_alias,
 };
 
 struct logfs_block_ops indirect_block_ops = {
 	.write_block = indirect_write_block,
-	.block_level = indirect_block_level,
 	.free_block = indirect_free_block,
 	.write_alias = indirect_write_alias,
 };
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
 	mempool_free(shadow, super->s_shadow_pool);
 }
 
+static void mark_segment(struct shadow_tree *tree, u32 segno)
+{
+	int err;
+
+	if (!btree_lookup32(&tree->segment_map, segno)) {
+		err = btree_insert32(&tree->segment_map, segno, (void *)1,
+				GFP_NOFS);
+		BUG_ON(err);
+		tree->no_shadowed_segments++;
+	}
+}
+
 /**
  * fill_shadow_tree - Propagate shadow tree changes due to a write
  * @inode:	Inode owning the page
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
 
 		super->s_dirty_used_bytes += shadow->new_len;
 		super->s_dirty_free_bytes += shadow->old_len;
+		mark_segment(tree, shadow->old_ofs >> super->s_segshift);
+		mark_segment(tree, shadow->new_ofs >> super->s_segshift);
 	}
 }
 
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
 	return logfs_truncate_direct(inode, size);
 }
 
-int logfs_truncate(struct inode *inode, u64 size)
+/*
+ * Truncate, by changing the segment file, can consume a fair amount
+ * of resources.  So back off from time to time and do some GC.
+ * 8 or 2048 blocks should be well within safety limits even if
+ * every single block resided in a different segment.
+ */
+#define TRUNCATE_STEP	(8 * 1024 * 1024)
+int logfs_truncate(struct inode *inode, u64 target)
 {
 	struct super_block *sb = inode->i_sb;
-	int err;
+	u64 size = i_size_read(inode);
+	int err = 0;
 
-	logfs_get_wblocks(sb, NULL, 1);
-	err = __logfs_truncate(inode, size);
-	if (!err)
-		err = __logfs_write_inode(inode, 0);
-	logfs_put_wblocks(sb, NULL, 1);
+	size = ALIGN(size, TRUNCATE_STEP);
+	while (size > target) {
+		if (size > TRUNCATE_STEP)
+			size -= TRUNCATE_STEP;
+		else
+			size = 0;
+		if (size < target)
+			size = target;
+
+		logfs_get_wblocks(sb, NULL, 1);
+		err = __logfs_truncate(inode, target);
+		if (!err)
+			err = __logfs_write_inode(inode, 0);
+		logfs_put_wblocks(sb, NULL, 1);
+	}
 
 	if (!err)
-		err = vmtruncate(inode, size);
+		err = vmtruncate(inode, target);
 
 	/* I don't trust error recovery yet. */
 	WARN_ON(err);
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb)
 	struct logfs_super *super = logfs_super(sb);
 
 	destroy_meta_inode(super->s_segfile_inode);
-	if (super->s_block_pool)
-		mempool_destroy(super->s_block_pool);
-	if (super->s_shadow_pool)
-		mempool_destroy(super->s_shadow_pool);
+	logfs_mempool_destroy(super->s_block_pool);
+	logfs_mempool_destroy(super->s_shadow_pool);
 }
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 801a3a1..f77ce2b 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 	return 0;
 }
 
-static gc_level_t btree_block_level(struct logfs_block *block)
-{
-	return expand_level(block->ino, block->level);
-}
-
 static struct logfs_block_ops btree_block_ops = {
 	.write_block	= btree_write_block,
-	.block_level	= btree_block_level,
 	.free_block	= __free_block,
 	.write_alias	= btree_write_alias,
 };
@@ -919,7 +913,7 @@ err:
 	for (i--; i >= 0; i--)
 		free_area(super->s_area[i]);
 	free_area(super->s_journal_area);
-	mempool_destroy(super->s_alias_pool);
+	logfs_mempool_destroy(super->s_alias_pool);
 	return -ENOMEM;
 }
 
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b60bfac..5866ee6 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -12,6 +12,7 @@
 #include "logfs.h"
 #include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <linux/mtd/mtd.h>
 #include <linux/statfs.h>
 #include <linux/buffer_head.h>
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
 	sb->s_fs_info = super;
 	sb->s_mtd = super->s_mtd;
 	sb->s_bdev = super->s_bdev;
+	if (sb->s_bdev)
+		sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+	if (sb->s_mtd)
+		sb->s_bdi = sb->s_mtd->backing_dev_info;
 	return 0;
 }
 
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
 
 	btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
 	btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+	btree_init_mempool32(&super->s_shadow_tree.segment_map,
+			super->s_btree_pool);
 
 	ret = logfs_init_mapping(sb);
 	if (ret)
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb)
 	if (super->s_erase_page)
 		__free_page(super->s_erase_page);
 	super->s_devops->put_device(sb);
-	mempool_destroy(super->s_btree_pool);
-	mempool_destroy(super->s_alias_pool);
+	logfs_mempool_destroy(super->s_btree_pool);
+	logfs_mempool_destroy(super->s_alias_pool);
 	kfree(super);
 	log_super("LogFS: Finished unmounting\n");
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1..fa33851 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = NCP_SUPER_MAGIC;
 	sb->s_op = &ncp_sops;
+	sb->s_bdi = &server->bdi;
 
 	server = NCP_SBP(sb);
 	memset(server, 0, sizeof(*server));
 
+	error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
+	if (error)
+		goto out_bdi;
+
 	server->ncp_filp = ncp_filp;
 	server->ncp_sock = sock;
 	
@@ -719,6 +724,8 @@ out_fput2:
 	if (server->info_filp)
 		fput(server->info_filp);
 out_fput:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
 	 * 
 	 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
 	kill_pid(server->m.wdog_pid, SIGTERM, 1);
 	put_pid(server->m.wdog_pid);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->priv.data);
 	kfree(server->auth.object_name);
 	vfree(server->rxbuf);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2a3d352..acc9c49 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
 static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
 {
 	target->flags = source->flags;
+	target->rsize = source->rsize;
+	target->wsize = source->wsize;
 	target->acregmin = source->acregmin;
 	target->acregmax = source->acregmax;
 	target->acdirmin = source->acdirmin;
@@ -1294,7 +1296,8 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
+		NFS_CAP_POSIX_LOCK;
 	server->options = data->options;
 
 	/* Get a client record */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1567124..ea61d26 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
 
 static void nfs_do_free_delegation(struct nfs_delegation *delegation)
 {
+	if (delegation->cred)
+		put_rpccred(delegation->cred);
 	kfree(delegation);
 }
 
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
 
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
-	struct rpc_cred *cred;
-
-	cred = rcu_dereference(delegation->cred);
-	rcu_assign_pointer(delegation->cred, NULL);
 	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
-	if (cred)
-		put_rpccred(cred);
 }
 
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
  */
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct rpc_cred *oldcred;
+	struct nfs_delegation *delegation;
+	struct rpc_cred *oldcred = NULL;
 
-	if (delegation == NULL)
-		return;
-	memcpy(delegation->stateid.data, res->delegation.data,
-			sizeof(delegation->stateid.data));
-	delegation->type = res->delegation_type;
-	delegation->maxsize = res->maxsize;
-	oldcred = delegation->cred;
-	delegation->cred = get_rpccred(cred);
-	clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
-	NFS_I(inode)->delegation_state = delegation->type;
-	smp_wmb();
-	put_rpccred(oldcred);
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL) {
+		spin_lock(&delegation->lock);
+		if (delegation->inode != NULL) {
+			memcpy(delegation->stateid.data, res->delegation.data,
+			       sizeof(delegation->stateid.data));
+			delegation->type = res->delegation_type;
+			delegation->maxsize = res->maxsize;
+			oldcred = delegation->cred;
+			delegation->cred = get_rpccred(cred);
+			clear_bit(NFS_DELEGATION_NEED_RECLAIM,
+				  &delegation->flags);
+			NFS_I(inode)->delegation_state = delegation->type;
+			spin_unlock(&delegation->lock);
+			put_rpccred(oldcred);
+			rcu_read_unlock();
+		} else {
+			/* We appear to have raced with a delegation return. */
+			spin_unlock(&delegation->lock);
+			rcu_read_unlock();
+			nfs_inode_set_delegation(inode, cred, res);
+		}
+	} else {
+		rcu_read_unlock();
+	}
 }
 
 static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
 	return inode;
 }
 
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+							   const nfs4_stateid *stateid,
+							   struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+	struct nfs_delegation *delegation =
+		rcu_dereference_protected(nfsi->delegation,
+					  lockdep_is_held(&clp->cl_lock));
 
 	if (delegation == NULL)
 		goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_delegation *delegation;
+	struct nfs_delegation *delegation, *old_delegation;
 	struct nfs_delegation *freeme = NULL;
 	int status = 0;
 
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	spin_lock_init(&delegation->lock);
 
 	spin_lock(&clp->cl_lock);
-	if (rcu_dereference(nfsi->delegation) != NULL) {
-		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
-					sizeof(delegation->stateid)) == 0 &&
-				delegation->type == nfsi->delegation->type) {
+	old_delegation = rcu_dereference_protected(nfsi->delegation,
+						   lockdep_is_held(&clp->cl_lock));
+	if (old_delegation != NULL) {
+		if (memcmp(&delegation->stateid, &old_delegation->stateid,
+					sizeof(old_delegation->stateid)) == 0 &&
+				delegation->type == old_delegation->type) {
 			goto out;
 		}
 		/*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 		dfprintk(FILE, "%s: server %s handed out "
 				"a duplicate delegation!\n",
 				__func__, clp->cl_hostname);
-		if (delegation->type <= nfsi->delegation->type) {
+		if (delegation->type <= old_delegation->type) {
 			freeme = delegation;
 			delegation = NULL;
 			goto out;
 		}
-		freeme = nfs_detach_delegation_locked(nfsi, NULL);
+		freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
 	}
 	list_add_rcu(&delegation->super_list, &clp->cl_delegations);
 	nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
 		if (inode == NULL)
 			continue;
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		rcu_read_unlock();
 		if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
-	if (rcu_dereference(nfsi->delegation) != NULL) {
+	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		if (delegation != NULL)
 			nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
 	struct nfs_delegation *delegation;
 	int err = 0;
 
-	if (rcu_dereference(nfsi->delegation) != NULL) {
+	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		if (delegation != NULL) {
 			nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
 		if (inode == NULL)
 			continue;
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		rcu_read_unlock();
 		if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c6f2750..a7bb5c6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
 		/* If we have submounts, don't unhash ! */
 		if (have_submounts(dentry))
 			goto out_valid;
+		if (dentry->d_flags & DCACHE_DISCONNECTED)
+			goto out_valid;
 		shrink_dcache_parent(dentry);
 	}
 	d_drop(dentry);
@@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				goto out;
@@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct inode *dir;
 	int openflags, ret = 0;
 
-	if (!is_atomic_open(nd))
+	if (!is_atomic_open(nd) || d_mountpoint(dentry))
 		goto no_open;
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 737128f..50a56ed 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -623,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	list_for_each_entry(pos, &nfsi->open_files, list) {
 		if (cred != NULL && pos->cred != cred)
 			continue;
-		if ((pos->mode & mode) == mode) {
-			ctx = get_nfs_open_context(pos);
-			break;
-		}
+		if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
+			continue;
+		ctx = get_nfs_open_context(pos);
+		break;
 	}
 	spin_unlock(&inode->i_lock);
 	return ctx;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d79a7b3..071fced 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
 	} else
 		nfs_refresh_inode(dir, o_res->dir_attr);
+	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
+		server->caps &= ~NFS_CAP_POSIX_LOCK;
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
 	status = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto err_opendata_put;
-	if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
+	if (server->caps & NFS_CAP_POSIX_LOCK)
 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
@@ -2068,8 +2070,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 			case -EDQUOT:
 			case -ENOSPC:
 			case -EROFS:
-				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
-				return 1;
+				return PTR_ERR(state);
 			default:
 				goto out_drop;
 		}
@@ -5217,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
+	if (IS_ERR(task)) {
 		status = PTR_ERR(task);
+		goto out;
+	}
 	rpc_put_task(task);
+	return 0;
 out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e016372..b4148fc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	if (data->version == 4) {
 		error = nfs4_try_mount(flags, dev_name, data, mnt);
 		kfree(data->client_address);
+		kfree(data->nfs_server.export_path);
 		goto out;
 	}
 #endif	/* CONFIG_NFS_V4 */
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
 	devname = nfs_path(path->mnt->mnt_devname,
 			path->mnt->mnt_root, path->dentry,
 			page, PAGE_SIZE);
-	if (devname == NULL)
+	if (IS_ERR(devname))
 		goto out_freepage;
 	tmp = kstrdup(devname, GFP_KERNEL);
 	if (tmp == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e..3aea3ca 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page)
 		struct inode *inode = page->mapping->host;
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
+		page_cache_get(page);
 		if (atomic_long_inc_return(&nfss->writeback) >
 				NFS_CONGESTION_ON_THRESH) {
 			set_bdi_congested(&nfss->backing_dev_info,
@@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
 	end_page_writeback(page);
+	page_cache_release(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -421,6 +423,7 @@ static void
 nfs_mark_request_dirty(struct nfs_page *req)
 {
 	__set_page_dirty_nobuffers(req->wb_page);
+	__mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -660,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	req = nfs_setup_write_request(ctx, page, offset, count);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	nfs_mark_request_dirty(req);
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+	nfs_mark_request_dirty(req);
 	nfs_clear_page_tag_locked(req);
 	return 0;
 }
@@ -739,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 	status = nfs_writepage_setup(ctx, page, offset, count);
 	if (status < 0)
 		nfs_set_pageerror(page);
-	else
-		__set_page_dirty_nobuffers(page);
 
 	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
 			status, (long long)i_size_read(inode));
@@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
+	struct page *page = req->wb_page;
 
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
-		nfs_end_page_writeback(req->wb_page);
+	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req))
 		nfs_inode_remove_request(req);
-	} else
-		nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_tag_locked(req);
+	nfs_end_page_writeback(page);
 }
 
 static int flush_task_priority(int how)
@@ -779,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		int how)
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
-	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
@@ -794,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		.callback_ops = call_ops,
 		.callback_data = data,
 		.workqueue = nfsiod_workqueue,
-		.flags = flags,
+		.flags = RPC_TASK_ASYNC,
 		.priority = priority,
 	};
+	int ret = 0;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -835,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		(unsigned long long)data->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
 	rpc_put_task(task);
-	return 0;
+out:
+	return ret;
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -847,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
+	struct page *page = req->wb_page;
+
 	nfs_mark_request_dirty(req);
-	nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_tag_locked(req);
+	nfs_end_page_writeback(page);
 }
 
 /*
@@ -1084,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata)
 		if (nfs_write_need_commit(data)) {
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req);
-			nfs_end_page_writeback(page);
 			dprintk(" marked for commit\n");
 			goto next;
 		}
 		dprintk(" OK\n");
 remove_request:
-		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
 	next:
 		nfs_clear_page_tag_locked(req);
+		nfs_end_page_writeback(page);
 	}
 	nfs_writedata_release(calldata);
 }
@@ -1190,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+{
+	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
+		return 1;
+	if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
+				NFS_INO_COMMIT, nfs_wait_bit_killable,
+				TASK_KILLABLE))
+		return 1;
+	return 0;
+}
+
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+{
+	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+}
+
+
 static void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
@@ -1207,7 +1237,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 {
 	struct nfs_page *first = nfs_list_entry(head->next);
 	struct inode *inode = first->wb_context->path.dentry->d_inode;
-	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
@@ -1222,7 +1251,7 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
 		.workqueue = nfsiod_workqueue,
-		.flags = flags,
+		.flags = RPC_TASK_ASYNC,
 		.priority = priority,
 	};
 
@@ -1282,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 				BDI_RECLAIMABLE);
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1337,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(data->inode));
 	nfs_commitdata_release(calldata);
 }
 
@@ -1351,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
-	int res;
+	int may_wait = how & FLUSH_SYNC;
+	int res = 0;
 
+	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+		goto out;
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
@@ -1360,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-	}
+		if (may_wait)
+			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+					nfs_wait_bit_killable,
+					TASK_KILLABLE);
+	} else
+		nfs_commit_clear_lock(NFS_I(inode));
+out:
 	return res;
 }
 
@@ -1432,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
+		wait_on_page_writeback(page);
 		req = nfs_page_find_request(page);
 		if (req == NULL)
 			break;
@@ -1466,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
 		.range_start = range_start,
 		.range_end = range_end,
 	};
-	struct nfs_page *req;
-	int need_commit;
 	int ret;
 
 	while(PagePrivate(page)) {
+		wait_on_page_writeback(page);
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
 		}
-		req = nfs_find_and_lock_request(page);
-		if (!req)
-			break;
-		if (IS_ERR(req)) {
-			ret = PTR_ERR(req);
+		ret = sync_inode(inode, &wbc);
+		if (ret < 0)
 			goto out_error;
-		}
-		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
-		nfs_clear_page_tag_locked(req);
-		if (need_commit) {
-			ret = nfs_commit_inode(inode, FLUSH_SYNC);
-			if (ret < 0)
-				goto out_error;
-		}
 	}
 	return 0;
 out_error:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e170317..34ccf81 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
 	argp->p = page_address(argp->pagelist[0]);
 	argp->pagelist++;
 	if (argp->pagelen < PAGE_SIZE) {
-		argp->end = p + (argp->pagelen>>2);
+		argp->end = argp->p + (argp->pagelen>>2);
 		argp->pagelen = 0;
 	} else {
-		argp->end = p + (PAGE_SIZE>>2);
+		argp->end = argp->p + (PAGE_SIZE>>2);
 		argp->pagelen -= PAGE_SIZE;
 	}
 	memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 			argp->p = page_address(argp->pagelist[0]);
 			argp->pagelist++;
 			if (argp->pagelen < PAGE_SIZE) {
-				argp->end = p + (argp->pagelen>>2);
+				argp->end = argp->p + (argp->pagelen>>2);
 				argp->pagelen = 0;
 			} else {
-				argp->end = p + (PAGE_SIZE>>2);
+				argp->end = argp->p + (PAGE_SIZE>>2);
 				argp->pagelen -= PAGE_SIZE;
 			}
 		}
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 8d6356a..7cfb87e 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -426,7 +426,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
 	bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
 	if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
 				    group_offset, bitmap))
-		printk(KERN_WARNING "%s: entry numer %llu already freed\n",
+		printk(KERN_WARNING "%s: entry number %llu already freed\n",
 		       __func__, (unsigned long long)req->pr_entry_nr);
 
 	nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7cdd98b..76c38e3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1879,7 +1879,7 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
 				   struct nilfs_btree_path *path,
 				   int level, struct buffer_head *bh)
 {
-	int maxlevel, ret;
+	int maxlevel = 0, ret;
 	struct nilfs_btree_node *parent;
 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
 	__u64 ptr;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index c2ff1b3..f90a33d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -649,7 +649,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
-	void __user *argp = (void * __user *)arg;
+	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
 	case NILFS_IOCTL_CHANGE_CPMODE:
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e..48145f5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 	sb->s_export_op = &nilfs_export_ops;
 	sb->s_root = NULL;
 	sb->s_time_gran = 1;
+	sb->s_bdi = nilfs->ns_bdi;
 
 	err = load_nilfs(nilfs, sbi);
 	if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbf..b3a159b 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
 
 config INOTIFY_USER
 	bool "Inotify support for userspace"
+	select ANON_INODES
 	select FSNOTIFY
 	default y
 	---help---
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb22..f9d5d3f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 				struct buffer_head *bh)
 {
 	int ret = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
 	mlog_entry_void();
 
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 
 	get_bh(bh); /* for end_buffer_write_sync() */
 	bh->b_end_io = end_buffer_write_sync;
+	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
 	submit_bh(WRITE, bh);
 
 	wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb9..12d5eb7 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	BUG_ON(!lksb);
 
 	/* only updates if this node masters the lockres */
+	spin_lock(&res->spinlock);
 	if (res->owner == dlm->node_num) {
-
-		spin_lock(&res->spinlock);
 		/* check the lksb flags for the direction */
 		if (lksb->flags & DLM_LKSB_GET_LVB) {
 			mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
  		 * here. In the future we might want to clear it at the time
  		 * the put is actually done.
 		 */
-		spin_unlock(&res->spinlock);
 	}
+	spin_unlock(&res->spinlock);
 
 	/* reset any lvb flags on the lksb */
 	lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de15..b83d610 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
  * O_RDONLY -> PRMODE level
  * O_WRONLY -> EXMODE level
  *
- * O_NONBLOCK -> LKM_NOQUEUE
+ * O_NONBLOCK -> NOQUEUE
  */
 static int dlmfs_decode_open_flags(int open_flags,
 				   int *level,
 				   int *flags)
 {
 	if (open_flags & (O_WRONLY|O_RDWR))
-		*level = LKM_EXMODE;
+		*level = DLM_LOCK_EX;
 	else
-		*level = LKM_PRMODE;
+		*level = DLM_LOCK_PR;
 
 	*flags = 0;
 	if (open_flags & O_NONBLOCK)
-		*flags |= LKM_NOQUEUE;
+		*flags |= DLM_LKF_NOQUEUE;
 
 	return 0;
 }
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
 		 * to be able userspace to be able to distinguish a
 		 * valid lock request from one that simply couldn't be
 		 * granted. */
-		if (flags & LKM_NOQUEUE && status == -EAGAIN)
+		if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
 			status = -ETXTBSY;
 		kfree(fp);
 		goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
 	status = 0;
 	if (fp) {
 		level = fp->fp_lock_level;
-		if (level != LKM_IVMODE)
+		if (level != DLM_LOCK_IV)
 			user_dlm_cluster_unlock(&ip->ip_lockres, level);
 
 		kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	if ((count + *ppos) > i_size_read(inode))
 		readlen = i_size_read(inode) - *ppos;
 	else
-		readlen = count - *ppos;
+		readlen = count;
 
 	lvb_buf = kmalloc(readlen, GFP_NOFS);
 	if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc..a5fbd9c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
 		if (why == RESTART_META) {
 			mlog(0, "restarting function.\n");
 			restart_func = 1;
+			status = 0;
 		} else {
 			BUG_ON(why != RESTART_TRANS);
 
@@ -1981,18 +1982,18 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	if (direct_io) {
-		ret = generic_segment_checks(iov, &nr_segs, &ocount,
-					     VERIFY_READ);
-		if (ret)
-			goto out_dio;
+	ret = generic_segment_checks(iov, &nr_segs, &ocount,
+				     VERIFY_READ);
+	if (ret)
+		goto out_dio;
 
-		count = ocount;
-		ret = generic_write_checks(file, ppos, &count,
-					   S_ISBLK(inode->i_mode));
-		if (ret)
-			goto out_dio;
+	count = ocount;
+	ret = generic_write_checks(file, ppos, &count,
+				   S_ISBLK(inode->i_mode));
+	if (ret)
+		goto out_dio;
 
+	if (direct_io) {
 		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
 						    ppos, count, ocount);
 		if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
 			goto out_dio;
 		}
 	} else {
-		written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
+		current->backing_dev_info = file->f_mapping->backing_dev_info;
+		written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
+						      ppos, count, 0);
+		current->backing_dev_info = NULL;
 	}
 
 out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
 		if (ret < 0)
 			written = ret;
 
-		if (!ret && (old_size != i_size_read(inode) ||
-		    old_clusters != OCFS2_I(inode)->ip_clusters ||
-		    has_refcount)) {
+		if (!ret && ((old_size != i_size_read(inode)) ||
+			     (old_clusters != OCFS2_I(inode)->ip_clusters) ||
+			     has_refcount)) {
 			ret = jbd2_journal_force_commit(osb->journal->j_journal);
 			if (ret < 0)
 				written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb..af18988 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 		if (IS_ERR(handle)) {
 			status = PTR_ERR(handle);
+			handle = NULL;
 			mlog_errno(status);
 			goto out;
 		}
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail_unlock;
 	}
 
-	status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
-				  orphan_dir_bh);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail_commit;
+	if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+		status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+					  orphan_dir_bh);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail_commit;
+		}
 	}
 
 	/* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
 static int ocfs2_wipe_inode(struct inode *inode,
 			    struct buffer_head *di_bh)
 {
-	int status, orphaned_slot;
+	int status, orphaned_slot = -1;
 	struct inode *orphan_dir_inode = NULL;
 	struct buffer_head *orphan_dir_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_dinode *di;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 
-	di = (struct ocfs2_dinode *) di_bh->b_data;
-	orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
+	if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+		orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
 
-	status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
-	if (status)
-		return status;
+		status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
+		if (status)
+			return status;
 
-	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
-						       ORPHAN_DIR_SYSTEM_INODE,
-						       orphaned_slot);
-	if (!orphan_dir_inode) {
-		status = -EEXIST;
-		mlog_errno(status);
-		goto bail;
-	}
+		orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+							       ORPHAN_DIR_SYSTEM_INODE,
+							       orphaned_slot);
+		if (!orphan_dir_inode) {
+			status = -EEXIST;
+			mlog_errno(status);
+			goto bail;
+		}
 
-	/* Lock the orphan dir. The lock will be held for the entire
-	 * delete_inode operation. We do this now to avoid races with
-	 * recovery completion on other nodes. */
-	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
-	if (status < 0) {
-		mutex_unlock(&orphan_dir_inode->i_mutex);
+		/* Lock the orphan dir. The lock will be held for the entire
+		 * delete_inode operation. We do this now to avoid races with
+		 * recovery completion on other nodes. */
+		mutex_lock(&orphan_dir_inode->i_mutex);
+		status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+		if (status < 0) {
+			mutex_unlock(&orphan_dir_inode->i_mutex);
 
-		mlog_errno(status);
-		goto bail;
+			mlog_errno(status);
+			goto bail;
+		}
 	}
 
 	/* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
 		mlog_errno(status);
 
 bail_unlock_dir:
+	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
+		return status;
+
 	ocfs2_inode_unlock(orphan_dir_inode, 1);
 	mutex_unlock(&orphan_dir_inode->i_mutex);
 	brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 
 	/* Do some basic inode verification... */
 	di = (struct ocfs2_dinode *) di_bh->b_data;
-	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
+	    !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
 		/*
 		 * Inodes in the orphan dir must have ORPHANED_FL.  The only
 		 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07..0b28e19 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
 #define OCFS2_INODE_MAYBE_ORPHANED	0x00000020
 /* Does someone have the file open O_DIRECT */
 #define OCFS2_INODE_OPEN_DIRECT		0x00000040
+/* Tell the inode wipe code it's not in orphan dir */
+#define OCFS2_INODE_SKIP_ORPHAN_DIR     0x00000080
 
 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50a..4cbb18f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
 		}
 	}
 
-	status = ocfs2_add_entry(handle, dentry, inode,
-				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
-				 &lookup);
-	if (status < 0) {
+	/*
+	 * Do this before adding the entry to the directory. We add
+	 * also set d_op after success so that ->d_iput() will cleanup
+	 * the dentry lock even if ocfs2_add_entry() fails below.
+	 */
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno);
+	if (status) {
 		mlog_errno(status);
 		goto leave;
 	}
+	dentry->d_op = &ocfs2_dentry_ops;
 
-	status = ocfs2_dentry_attach_lock(dentry, inode,
-					  OCFS2_I(dir)->ip_blkno);
-	if (status) {
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
+				 &lookup);
+	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
 	insert_inode_hash(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 	status = 0;
 leave:
@@ -445,11 +450,6 @@ leave:
 
 	ocfs2_free_dir_lookup_result(&lookup);
 
-	if ((status < 0) && inode) {
-		clear_nlink(inode);
-		iput(inode);
-	}
-
 	if (inode_ac)
 		ocfs2_free_alloc_context(inode_ac);
 
@@ -459,6 +459,17 @@ leave:
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
 
+	/*
+	 * We should call iput after the i_mutex of the bitmap been
+	 * unlocked in ocfs2_free_alloc_context, or the
+	 * ocfs2_delete_inode will mutex_lock again.
+	 */
+	if ((status < 0) && inode) {
+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
+		clear_nlink(inode);
+		iput(inode);
+	}
+
 	mlog_exit(status);
 
 	return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
 		}
 	}
 
-	status = ocfs2_add_entry(handle, dentry, inode,
-				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
-				 &lookup);
-	if (status < 0) {
+	/*
+	 * Do this before adding the entry to the directory. We add
+	 * also set d_op after success so that ->d_iput() will cleanup
+	 * the dentry lock even if ocfs2_add_entry() fails below.
+	 */
+	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+	if (status) {
 		mlog_errno(status);
 		goto bail;
 	}
+	dentry->d_op = &ocfs2_dentry_ops;
 
-	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
-	if (status) {
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
+				 &lookup);
+	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
 	insert_inode_hash(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 bail:
 	if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
 	if (xattr_ac)
 		ocfs2_free_alloc_context(xattr_ac);
 	if ((status < 0) && inode) {
+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
 		clear_nlink(inode);
 		iput(inode);
 	}
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	}
 
 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
+	OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
 
 	/* Record which orphan dir our inode now resides
 	 * in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c..5cbcd0f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 	di->i_attr = s_di->i_attr;
 
 	if (preserve) {
+		t_inode->i_uid = s_inode->i_uid;
+		t_inode->i_gid = s_inode->i_gid;
+		t_inode->i_mode = s_inode->i_mode;
 		di->i_uid = s_di->i_uid;
 		di->i_gid = s_di->i_gid;
 		di->i_mode = s_di->i_mode;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db8..8418fcc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
  */
 static const struct pid_entry tid_base_stuff[] = {
 	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
-	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations),
+	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
 	REG("environ",   S_IRUSR, proc_environ_operations),
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a05a669..0705534 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -662,31 +662,18 @@ static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
 	return pme;
 }
 
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
-				 unsigned long end, struct mm_walk *walk)
+/* This function walks within one hugetlb entry in the single call */
+static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
+				 unsigned long addr, unsigned long end,
+				 struct mm_walk *walk)
 {
-	struct vm_area_struct *vma;
 	struct pagemapread *pm = walk->private;
-	struct hstate *hs = NULL;
 	int err = 0;
+	u64 pfn;
 
-	vma = find_vma(walk->mm, addr);
-	if (vma)
-		hs = hstate_vma(vma);
 	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
-
-		if (vma && (addr >= vma->vm_end)) {
-			vma = find_vma(walk->mm, addr);
-			if (vma)
-				hs = hstate_vma(vma);
-		}
-
-		if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
-			/* calculate pfn of the "raw" page in the hugepage. */
-			int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
-			pfn = huge_pte_to_pagemap_entry(*pte, offset);
-		}
+		int offset = (addr & ~hmask) >> PAGE_SHIFT;
+		pfn = huge_pte_to_pagemap_entry(*pte, offset);
 		err = add_to_pagemap(addr, pfn, pm);
 		if (err)
 			return err;
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index dad7fb2..3e21b1e 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING
 	  Note that this behavior is currently deprecated and may go away in
 	  future. Please use notification via netlink socket instead.
 
+config QUOTA_DEBUG
+	bool "Additional quota sanity checks"
+	depends on QUOTA
+	default n
+	help
+	  If you say Y here, quota subsystem will perform some additional
+	  sanity checks of quota internal structures. If unsure, say N.
+
 # Generic support for tree structured quota files. Selected when needed.
 config QUOTA_TREE
 	 tristate
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e0b870f..788b580 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -80,8 +80,6 @@
 
 #include <asm/uaccess.h>
 
-#define __DQUOT_PARANOIA
-
 /*
  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
  * and quota formats, dqstats structure containing statistics about the lists
@@ -695,7 +693,7 @@ void dqput(struct dquot *dquot)
 
 	if (!dquot)
 		return;
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	if (!atomic_read(&dquot->dq_count)) {
 		printk("VFS: dqput: trying to free free dquot\n");
 		printk("VFS: device %s, dquot of %s %d\n",
@@ -748,7 +746,7 @@ we_slept:
 		goto we_slept;
 	}
 	atomic_dec(&dquot->dq_count);
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	/* sanity check */
 	BUG_ON(!list_empty(&dquot->dq_free));
 #endif
@@ -845,7 +843,7 @@ we_slept:
 		dquot = NULL;
 		goto out;
 	}
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	BUG_ON(!dquot->dq_sb);	/* Has somebody invalidated entry under us? */
 #endif
 out:
@@ -874,14 +872,18 @@ static int dqinit_needed(struct inode *inode, int type)
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
+#ifdef CONFIG_QUOTA_DEBUG
 	int reserved = 0;
+#endif
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
 			continue;
+#ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
 			reserved = 1;
+#endif
 		if (!atomic_read(&inode->i_writecount))
 			continue;
 		if (!dqinit_needed(inode, type))
@@ -903,11 +905,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	spin_unlock(&inode_lock);
 	iput(old_inode);
 
+#ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened before quota"
 			" was turned on thus quota information is probably "
 			"inconsistent. Please run quotacheck(8).\n", sb->s_id);
 	}
+#endif
 }
 
 /*
@@ -934,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
 	inode->i_dquot[type] = NULL;
 	if (dquot) {
 		if (dqput_blocks(dquot)) {
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 			if (atomic_read(&dquot->dq_count) != 1)
 				printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
 #endif
@@ -2322,34 +2326,34 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 	if (di->dqb_valid & QIF_SPACE) {
 		dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
 		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
 		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_INODES) {
 		dm->dqb_curinodes = di->dqb_curinodes;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ILIMITS) {
 		dm->dqb_isoftlimit = di->dqb_isoftlimit;
 		dm->dqb_ihardlimit = di->dqb_ihardlimit;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BTIME) {
 		dm->dqb_btime = di->dqb_btime;
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ITIME) {
 		dm->dqb_itime = di->dqb_itime;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
 	}
 
 	if (check_blim) {
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075a..0793044 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
 				   struct reiserfs_de_head *deh)
 {
 	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
-	if (reiserfs_expose_privroot(dir->d_sb))
-		return 0;
 	return (dir == dir->d_parent && privroot->d_inode &&
 	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586b..e7cc00e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (!err && new_size < i_size_read(dentry->d_inode)) {
 		struct iattr newattrs = {
 			.ia_ctime = current_fs_time(inode->i_sb),
-			.ia_size = buffer_size,
+			.ia_size = new_size,
 			.ia_valid = ATTR_SIZE | ATTR_CTIME,
 		};
 
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
 	return generic_permission(inode, mask, NULL);
 }
 
-/* This will catch lookups from the fs root to .reiserfs_priv */
-static int
-xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
+static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
-	if (container_of(q1, struct dentry, d_name) == priv_root)
-		return -ENOENT;
-	if (q1->len == name->len &&
-		   !memcmp(q1->name, name->name, name->len))
-		return 0;
-	return 1;
+	return -EPERM;
 }
 
 static const struct dentry_operations xattr_lookup_poison_ops = {
-	.d_compare = xattr_lookup_poison,
+	.d_revalidate = xattr_hide_revalidate,
 };
 
 int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		if (!reiserfs_expose_privroot(s))
-			s->s_root->d_op = &xattr_lookup_poison_ops;
+		dentry->d_op = &xattr_lookup_poison_ops;
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f0..dfa1d67 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
 	if (server->conn_pid)
 		kill_pid(server->conn_pid, SIGTERM, 1);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->ops);
 	smb_unload_nls(server);
 	sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
 	if (!server)
 		goto out_no_server;
 	sb->s_fs_info = server;
+	
+	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
+		goto out_bdi;
+
+	sb->s_bdi = &server->bdi;
 
 	server->super_block = sb;
 	server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
 out_bad_option:
 	kfree(mem);
 out_no_mem:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	if (!server->mnt)
 		printk(KERN_ERR "smb_fill_super: allocation failure\n");
 	sb->s_fs_info = NULL;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81..653c030 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 	u64 cur_index = index >> msblk->devblksize_log2;
 	int bytes, compressed, b = 0, k = 0, page = 0, avail;
 
-
-	bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1,
-				sizeof(*bh), GFP_KERNEL);
+	bh = kcalloc(((srclength + msblk->devblksize - 1)
+		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
 	if (bh == NULL)
 		return -ENOMEM;
 
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec..48b6f4a 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
 
 	err = squashfs_read_inode(root, root_inode);
 	if (err) {
-		iget_failed(root);
+		make_bad_inode(root);
+		iput(root);
 		goto failed_mount;
 	}
 	insert_inode_hash(root);
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);
 		kfree(sbi->meta_index);
+		kfree(sbi->inode_lookup_table);
 		kfree(sb->s_fs_info);
 		sb->s_fs_info = NULL;
 	}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0..7a60387 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 		goto release_mutex;
 	}
 
+	length = stream->total_out;
 	mutex_unlock(&msblk->read_data_mutex);
-	return stream->total_out;
+	return length;
 
 release_mutex:
 	mutex_unlock(&msblk->read_data_mutex);
diff --git a/fs/super.c b/fs/super.c
index f35ac60..1527e6a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
+#include <linux/backing-dev.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data)
 		return -EMFILE;
 	}
 	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_bdi = &noop_backing_dev_info;
 	return 0;
 }
 
@@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (error < 0)
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
+	WARN_ON(!mnt->mnt_sb->s_bdi);
 
- 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- 	if (error)
- 		goto out_sb;
+	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
+	if (error)
+		goto out_sb;
 
 	/*
 	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d7..92b2281 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
 #include "internal.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	 * This should be safe, as we require bdi backing to actually
 	 * write out data in the first place
 	 */
-	if (!sb->s_bdi)
+	if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
 		return 0;
 
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 19626e2..9a9378b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -125,9 +125,8 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
-	if (bloc->logicalBlockNum < 0 ||
-	    (bloc->logicalBlockNum + count) >
-		partmap->s_partition_len) {
+	if (bloc->logicalBlockNum + count < count ||
+	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
 			  count, partmap->s_partition_len);
@@ -393,9 +392,8 @@ static void udf_table_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
-	if (bloc->logicalBlockNum < 0 ||
-	    (bloc->logicalBlockNum + count) >
-		partmap->s_partition_len) {
+	if (bloc->logicalBlockNum + count < count ||
+	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count,
 			  partmap->s_partition_len);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1eb0677..4b6a46c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -218,7 +218,7 @@ const struct file_operations udf_file_operations = {
 	.llseek			= generic_file_llseek,
 };
 
-static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
+int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index bb863fe..8a3fbd1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1314,7 +1314,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		break;
 	case ICBTAG_FILE_TYPE_SYMLINK:
 		inode->i_data.a_ops = &udf_symlink_aops;
-		inode->i_op = &page_symlink_inode_operations;
+		inode->i_op = &udf_symlink_inode_operations;
 		inode->i_mode = S_IFLNK | S_IRWXUGO;
 		break;
 	case ICBTAG_FILE_TYPE_MAIN:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index db423ab..7581602 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -925,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	iinfo = UDF_I(inode);
 	inode->i_mode = S_IFLNK | S_IRWXUGO;
 	inode->i_data.a_ops = &udf_symlink_aops;
-	inode->i_op = &page_symlink_inode_operations;
+	inode->i_op = &udf_symlink_inode_operations;
 
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
 		struct kernel_lb_addr eloc;
@@ -1393,6 +1393,7 @@ const struct export_operations udf_export_ops = {
 const struct inode_operations udf_dir_inode_operations = {
 	.lookup				= udf_lookup,
 	.create				= udf_create,
+	.setattr			= udf_setattr,
 	.link				= udf_link,
 	.unlink				= udf_unlink,
 	.symlink			= udf_symlink,
@@ -1401,3 +1402,9 @@ const struct inode_operations udf_dir_inode_operations = {
 	.mknod				= udf_mknod,
 	.rename				= udf_rename,
 };
+const struct inode_operations udf_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+	.setattr	= udf_setattr,
+};
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 4223ac8..702a114 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -76,6 +76,7 @@ extern const struct inode_operations udf_dir_inode_operations;
 extern const struct file_operations udf_dir_operations;
 extern const struct inode_operations udf_file_inode_operations;
 extern const struct file_operations udf_file_operations;
+extern const struct inode_operations udf_symlink_inode_operations;
 extern const struct address_space_operations udf_aops;
 extern const struct address_space_operations udf_adinicb_aops;
 extern const struct address_space_operations udf_symlink_aops;
@@ -131,7 +132,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
 /* file.c */
 extern int udf_ioctl(struct inode *, struct file *, unsigned int,
 		     unsigned long);
-
+extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
 /* inode.c */
 extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
 extern int udf_sync_inode(struct inode *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b4..29f1edc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
 
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
+	xfs_inode_shrinker_unregister(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
 	xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_vnrele;
 
+	xfs_inode_shrinker_register(mp);
+
 	kfree(mtpt);
 	return 0;
 
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
 		goto out_cleanup_procfs;
 
 	vfs_initquota();
+	xfs_inode_shrinker_init();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
 {
 	vfs_exitquota();
 	unregister_filesystem(&xfs_fs_type);
+	xfs_inode_shrinker_destroy();
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 05cd853..a427c63 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	uint32_t		first_index;
 	int			last_error = 0;
@@ -134,7 +135,7 @@ restart:
 		if (error == EFSCORRUPTED)
 			break;
 
-	} while (1);
+	} while ((*nr_to_scan)--);
 
 	if (skipped) {
 		delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	int			error = 0;
 	int			last_error = 0;
 	xfs_agnumber_t		ag;
+	int			nr;
 
+	nr = nr_to_scan ? *nr_to_scan : INT_MAX;
 	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
 		struct xfs_perag	*pag;
 
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
 			continue;
 		}
 		error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
-						exclusive);
+						exclusive, &nr);
 		xfs_perag_put(pag);
 		if (error) {
 			last_error = error;
 			if (error == EFSCORRUPTED)
 				break;
 		}
+		if (nr <= 0)
+			break;
 	}
+	if (nr_to_scan)
+		*nr_to_scan = nr;
 	return XFS_ERROR(last_error);
 }
 
@@ -291,7 +299,7 @@ xfs_sync_data(
 	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG, 0);
+				      XFS_ICI_NO_TAG, 0, NULL);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -310,7 +318,7 @@ xfs_sync_attr(
 	ASSERT((flags & ~SYNC_WAIT) == 0);
 
 	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG, 0);
+				     XFS_ICI_NO_TAG, 0, NULL);
 }
 
 STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
 	radix_tree_tag_set(&pag->pag_ici_root,
 			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
 			   XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable++;
 }
 
 /*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
 {
 	radix_tree_tag_clear(&pag->pag_ici_root,
 			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable--;
 }
 
 /*
@@ -820,10 +830,10 @@ xfs_reclaim_inode(
 	 * call into reclaim to find it in a clean state instead of waiting for
 	 * it now. We also don't return errors here - if the error is transient
 	 * then the next reclaim pass will flush the inode, and if the error
-	 * is permanent then the next sync reclaim will relcaim the inode and
+	 * is permanent then the next sync reclaim will reclaim the inode and
 	 * pass on the error.
 	 */
-	if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
 			"inode 0x%llx background reclaim flush failed with %d",
 			(long long)ip->i_ino, error);
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-					XFS_ICI_RECLAIM_TAG, 1);
+					XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
+{
+	struct xfs_mount *mp;
+	struct xfs_perag *pag;
+	xfs_agnumber_t	ag;
+	int		reclaimable = 0;
+
+	if (nr_to_scan) {
+		if (!(gfp_mask & __GFP_FS))
+			return -1;
+
+		down_read(&xfs_mount_list_lock);
+		list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+			xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+					XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+			if (nr_to_scan <= 0)
+				break;
+		}
+		up_read(&xfs_mount_list_lock);
+	}
+
+	down_read(&xfs_mount_list_lock);
+	list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+			pag = xfs_perag_get(mp, ag);
+			if (!pag->pag_ici_init) {
+				xfs_perag_put(pag);
+				continue;
+			}
+			reclaimable += pag->pag_ici_reclaimable;
+			xfs_perag_put(pag);
+		}
+	}
+	up_read(&xfs_mount_list_lock);
+	return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+	.shrink = xfs_reclaim_inode_shrink,
+	.seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+	init_rwsem(&xfs_mount_list_lock);
+	register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+	ASSERT(list_empty(&xfs_mount_list));
+	unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_add_tail(&mp->m_mplist, &xfs_mount_list);
+	up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_del(&mp->m_mplist);
+	up_write(&xfs_mount_list_lock);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c34..cdcbaac 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag, int write_lock);
+	int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d..50bee07 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
 	uint		 flags)
 {
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+				XFS_ICI_NO_TAG, 0, NULL);
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1f..abb8222 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
 	int		pag_ici_init;	/* incore inode cache initialised */
 	rwlock_t	pag_ici_lock;	/* incore inode lock */
 	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
+	int		pag_ici_reclaimable;	/* reclaimable inodes */
 #endif
 	int		pagb_count;	/* pagb slots in use */
 	xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS];	/* unstable blocks */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d..5bba29a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
 	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
 		return EINVAL;
 
-	/* Check root block of temp in btree form to max in target */
+	/*
+	 * If we are in a btree format, check that the temp root block will fit
+	 * in the target and that it has enough extents to be in btree format
+	 * in the target.
+	 *
+	 * Note that we have to be careful to allow btree->extent conversions
+	 * (a common defrag case) which will occur when the temp inode is in
+	 * extent format...
+	 */
 	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
-	    XFS_IFORK_BOFF(ip) &&
-	    tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+	    ((XFS_IFORK_BOFF(ip) &&
+	      tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
+	     XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
 		return EINVAL;
 
-	/* Check root block of target in btree form to max in temp */
+	/* Reciprocal target->temp btree format checks */
 	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
-	    XFS_IFORK_BOFF(tip) &&
-	    ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+	    ((XFS_IFORK_BOFF(tip) &&
+	      ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
+	     XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
 		return EINVAL;
 
 	return 0;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e8fba92..2be0191 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t	*mp,
 
 /*
  * Determine if we have a transaction that has gone to disk
- * that needs to be covered. Log activity needs to be idle (no AIL and
- * nothing in the iclogs). And, we need to be in the right state indicating
- * something has gone out.
+ * that needs to be covered. To begin the transition to the idle state
+ * firstly the log needs to be idle (no AIL and nothing in the iclogs).
+ * If we are then in a state where covering is needed, the caller is informed
+ * that dummy transactions are required to move the log into the idle state.
+ *
+ * Because this is called as part of the sync process, we should also indicate
+ * that dummy transactions should be issued in anything but the covered or
+ * idle states. This ensures that the log tail is accurately reflected in
+ * the log at the end of the sync, hence if a crash occurrs avoids replay
+ * of transactions where the metadata is already on disk.
  */
 int
 xfs_log_need_covered(xfs_mount_t *mp)
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
 		return 0;
 
 	spin_lock(&log->l_icloglock);
-	if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
-		(log->l_covered_state == XLOG_STATE_COVER_NEED2))
-			&& !xfs_trans_ail_tail(log->l_ailp)
-			&& xlog_iclogs_empty(log)) {
-		if (log->l_covered_state == XLOG_STATE_COVER_NEED)
-			log->l_covered_state = XLOG_STATE_COVER_DONE;
-		else {
-			ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2);
-			log->l_covered_state = XLOG_STATE_COVER_DONE2;
+	switch (log->l_covered_state) {
+	case XLOG_STATE_COVER_DONE:
+	case XLOG_STATE_COVER_DONE2:
+	case XLOG_STATE_COVER_IDLE:
+		break;
+	case XLOG_STATE_COVER_NEED:
+	case XLOG_STATE_COVER_NEED2:
+		if (!xfs_trans_ail_tail(log->l_ailp) &&
+		    xlog_iclogs_empty(log)) {
+			if (log->l_covered_state == XLOG_STATE_COVER_NEED)
+				log->l_covered_state = XLOG_STATE_COVER_DONE;
+			else
+				log->l_covered_state = XLOG_STATE_COVER_DONE2;
 		}
+		/* FALLTHRU */
+	default:
 		needed = 1;
+		break;
 	}
 	spin_unlock(&log->l_icloglock);
 	return needed;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7..9ff48a16 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
 	wait_queue_head_t	m_wait_single_sync_task;
 	__int64_t		m_update_flags;	/* sb flags we need to update
 						   on the next remount,rw */
+	struct list_head	m_mplist;	/* inode shrinker mount list */
 } xfs_mount_t;
 
 /*