57 files changed, 903 insertions, 587 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 41c93c7..b1fe82c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -337,7 +337,7 @@ static void bio_fs_destructor(struct bio *bio)
  *	RETURNS:
  *	Pointer to new bio on success, NULL on failure.
  */
-struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
+struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 
@@ -365,7 +365,7 @@ static void bio_kmalloc_destructor(struct bio *bio)
  *   %__GFP_WAIT, the allocation is guaranteed to succeed.
  *
  **/
-struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
+struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	struct bio *bio;
 
@@ -696,7 +696,8 @@ static void bio_free_map_data(struct bio_map_data *bmd)
 	kfree(bmd);
 }
 
-static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
+static struct bio_map_data *bio_alloc_map_data(int nr_segs,
+					       unsigned int iov_count,
 					       gfp_t gfp_mask)
 {
 	struct bio_map_data *bmd;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8855aad..22c64ff 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
 		return PTR_ERR(fspath);
 
 	if (fspath > fspath_min) {
-		ipath->fspath->val[i] = (u64)fspath;
+		ipath->fspath->val[i] = (u64)(unsigned long)fspath;
 		++ipath->fspath->elem_cnt;
 		ipath->fspath->bytes_left = fspath - fspath_min;
 	} else {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 5a5d325..634608d2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -147,14 +147,12 @@ struct btrfs_inode {
 	 * the btrfs file release call will add this inode to the
 	 * ordered operations list so that we make sure to flush out any
 	 * new data the application may have written before commit.
-	 *
-	 * yes, its silly to have a single bitflag, but we might grow more
-	 * of these.
 	 */
 	unsigned ordered_data_close:1;
 	unsigned orphan_meta_reserved:1;
 	unsigned dummy_inode:1;
 	unsigned in_defrag:1;
+	unsigned delalloc_meta_reserved:1;
 
 	/*
 	 * always compress this one file
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0fe615e..dede441 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct extent_buffer *buf)
 {
+	/* ensure we can see the force_cow */
+	smp_rmb();
+
+	/*
+	 * We do not need to cow a block if
+	 * 1) this block is not created or changed in this transaction;
+	 * 2) this block does not belong to TREE_RELOC tree;
+	 * 3) the root is not forced COW.
+	 *
+	 * What is forced COW:
+	 *    when we create snapshot during commiting the transaction,
+	 *    after we've finished coping src root, we must COW the shared
+	 *    block to ensure the metadata consistency.
+	 */
 	if (btrfs_header_generation(buf) == trans->transid &&
 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
 	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
-	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
+	    !root->force_cow)
 		return 0;
 	return 1;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9ba59f..04a5dfc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -848,7 +848,8 @@ struct btrfs_free_cluster {
 enum btrfs_caching_type {
 	BTRFS_CACHE_NO		= 0,
 	BTRFS_CACHE_STARTED	= 1,
-	BTRFS_CACHE_FINISHED	= 2,
+	BTRFS_CACHE_FAST	= 2,
+	BTRFS_CACHE_FINISHED	= 3,
 };
 
 enum btrfs_disk_cache_state {
@@ -1271,6 +1272,8 @@ struct btrfs_root {
 	 * for stat.  It may be used for more later
 	 */
 	dev_t anon_dev;
+
+	int force_cow;
 };
 
 struct btrfs_ioctl_defrag_range_args {
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 3a1b939..5b16357 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -617,12 +617,14 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 static int btrfs_delayed_inode_reserve_metadata(
 					struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
+					struct inode *inode,
 					struct btrfs_delayed_node *node)
 {
 	struct btrfs_block_rsv *src_rsv;
 	struct btrfs_block_rsv *dst_rsv;
 	u64 num_bytes;
 	int ret;
+	int release = false;
 
 	src_rsv = trans->block_rsv;
 	dst_rsv = &root->fs_info->delayed_block_rsv;
@@ -652,12 +654,65 @@ static int btrfs_delayed_inode_reserve_metadata(
 		if (!ret)
 			node->bytes_reserved = num_bytes;
 		return ret;
+	} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
+		spin_lock(&BTRFS_I(inode)->lock);
+		if (BTRFS_I(inode)->delalloc_meta_reserved) {
+			BTRFS_I(inode)->delalloc_meta_reserved = 0;
+			spin_unlock(&BTRFS_I(inode)->lock);
+			release = true;
+			goto migrate;
+		}
+		spin_unlock(&BTRFS_I(inode)->lock);
+
+		/* Ok we didn't have space pre-reserved.  This shouldn't happen
+		 * too often but it can happen if we do delalloc to an existing
+		 * inode which gets dirtied because of the time update, and then
+		 * isn't touched again until after the transaction commits and
+		 * then we try to write out the data.  First try to be nice and
+		 * reserve something strictly for us.  If not be a pain and try
+		 * to steal from the delalloc block rsv.
+		 */
+		ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
+		if (!ret)
+			goto out;
+
+		ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+		if (!ret)
+			goto out;
+
+		/*
+		 * Ok this is a problem, let's just steal from the global rsv
+		 * since this really shouldn't happen that often.
+		 */
+		WARN_ON(1);
+		ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
+					      dst_rsv, num_bytes);
+		goto out;
 	}
 
+migrate:
 	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+
+out:
+	/*
+	 * Migrate only takes a reservation, it doesn't touch the size of the
+	 * block_rsv.  This is to simplify people who don't normally have things
+	 * migrated from their block rsv.  If they go to release their
+	 * reservation, that will decrease the size as well, so if migrate
+	 * reduced size we'd end up with a negative size.  But for the
+	 * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
+	 * but we could in fact do this reserve/migrate dance several times
+	 * between the time we did the original reservation and we'd clean it
+	 * up.  So to take care of this, release the space for the meta
+	 * reservation here.  I think it may be time for a documentation page on
+	 * how block rsvs. work.
+	 */
 	if (!ret)
 		node->bytes_reserved = num_bytes;
 
+	if (release)
+		btrfs_block_rsv_release(root, src_rsv, num_bytes);
+
 	return ret;
 }
 
@@ -1708,7 +1763,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 		goto release_node;
 	}
 
-	ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
+	ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
+						   delayed_node);
 	if (ret)
 		goto release_node;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 102c176..632f8f3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -620,7 +620,7 @@ out:
 
 static int btree_io_failed_hook(struct bio *failed_bio,
 			 struct page *page, u64 start, u64 end,
-			 u64 mirror_num, struct extent_state *state)
+			 int mirror_num, struct extent_state *state)
 {
 	struct extent_io_tree *tree;
 	unsigned long len;
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	u64 features;
 	struct btrfs_key location;
 	struct buffer_head *bh;
-	struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
-						 GFP_NOFS);
-	struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
-						 GFP_NOFS);
+	struct btrfs_super_block *disk_super;
 	struct btrfs_root *tree_root = btrfs_sb(sb);
-	struct btrfs_fs_info *fs_info = NULL;
-	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
-						GFP_NOFS);
-	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
-					      GFP_NOFS);
+	struct btrfs_fs_info *fs_info = tree_root->fs_info;
+	struct btrfs_root *extent_root;
+	struct btrfs_root *csum_root;
+	struct btrfs_root *chunk_root;
+	struct btrfs_root *dev_root;
 	struct btrfs_root *log_tree_root;
-
 	int ret;
 	int err = -EINVAL;
 	int num_backups_tried = 0;
 	int backup_index = 0;
 
-	struct btrfs_super_block *disk_super;
+	extent_root = fs_info->extent_root =
+		kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	csum_root = fs_info->csum_root =
+		kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	chunk_root = fs_info->chunk_root =
+		kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	dev_root = fs_info->dev_root =
+		kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
 
-	if (!extent_root || !tree_root || !tree_root->fs_info ||
-	    !chunk_root || !dev_root || !csum_root) {
+	if (!extent_root || !csum_root || !chunk_root || !dev_root) {
 		err = -ENOMEM;
 		goto fail;
 	}
-	fs_info = tree_root->fs_info;
 
 	ret = init_srcu_struct(&fs_info->subvol_srcu);
 	if (ret) {
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	mutex_init(&fs_info->reloc_mutex);
 
 	init_completion(&fs_info->kobj_unregister);
-	fs_info->tree_root = tree_root;
-	fs_info->extent_root = extent_root;
-	fs_info->csum_root = csum_root;
-	fs_info->chunk_root = chunk_root;
-	fs_info->dev_root = dev_root;
-	fs_info->fs_devices = fs_devices;
 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
 	INIT_LIST_HEAD(&fs_info->space_info);
 	btrfs_mapping_init(&fs_info->mapping_tree);
@@ -2465,21 +2460,20 @@ fail_sb_buffer:
 	btrfs_stop_workers(&fs_info->caching_workers);
 fail_alloc:
 fail_iput:
+	btrfs_mapping_tree_free(&fs_info->mapping_tree);
+
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	iput(fs_info->btree_inode);
-
-	btrfs_close_devices(fs_info->fs_devices);
-	btrfs_mapping_tree_free(&fs_info->mapping_tree);
 fail_bdi:
 	bdi_destroy(&fs_info->bdi);
 fail_srcu:
 	cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
+	btrfs_close_devices(fs_info->fs_devices);
 	free_fs_info(fs_info);
 	return ERR_PTR(err);
 
 recovery_tree_root:
-
 	if (!btrfs_test_opt(tree_root, RECOVERY))
 		goto fail_tree_roots;
 
@@ -2579,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
 	int errors = 0;
 	u32 crc;
 	u64 bytenr;
-	int last_barrier = 0;
 
 	if (max_mirrors == 0)
 		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
 
-	/* make sure only the last submit_bh does a barrier */
-	if (do_barriers) {
-		for (i = 0; i < max_mirrors; i++) {
-			bytenr = btrfs_sb_offset(i);
-			if (bytenr + BTRFS_SUPER_INFO_SIZE >=
-			    device->total_bytes)
-				break;
-			last_barrier = i;
-		}
-	}
-
 	for (i = 0; i < max_mirrors; i++) {
 		bytenr = btrfs_sb_offset(i);
 		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2640,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
 			bh->b_end_io = btrfs_end_buffer_write_sync;
 		}
 
-		if (i == last_barrier && do_barriers)
-			ret = submit_bh(WRITE_FLUSH_FUA, bh);
-		else
-			ret = submit_bh(WRITE_SYNC, bh);
-
+		/*
+		 * we fua the first super.  The others we allow
+		 * to go down lazy.
+		 */
+		ret = submit_bh(WRITE_FUA, bh);
 		if (ret)
 			errors++;
 	}
 	return errors < i ? 0 : -1;
 }
 
+/*
+ * endio for the write_dev_flush, this will wake anyone waiting
+ * for the barrier when it is done
+ */
+static void btrfs_end_empty_barrier(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	bio_put(bio);
+}
+
+/*
+ * trigger flushes for one the devices.  If you pass wait == 0, the flushes are
+ * sent down.  With wait == 1, it waits for the previous flush.
+ *
+ * any device where the flush fails with eopnotsupp are flagged as not-barrier
+ * capable
+ */
+static int write_dev_flush(struct btrfs_device *device, int wait)
+{
+	struct bio *bio;
+	int ret = 0;
+
+	if (device->nobarriers)
+		return 0;
+
+	if (wait) {
+		bio = device->flush_bio;
+		if (!bio)
+			return 0;
+
+		wait_for_completion(&device->flush_wait);
+
+		if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
+			printk("btrfs: disabling barriers on dev %s\n",
+			       device->name);
+			device->nobarriers = 1;
+		}
+		if (!bio_flagged(bio, BIO_UPTODATE)) {
+			ret = -EIO;
+		}
+
+		/* drop the reference from the wait == 0 run */
+		bio_put(bio);
+		device->flush_bio = NULL;
+
+		return ret;
+	}
+
+	/*
+	 * one reference for us, and we leave it for the
+	 * caller
+	 */
+	device->flush_bio = NULL;;
+	bio = bio_alloc(GFP_NOFS, 0);
+	if (!bio)
+		return -ENOMEM;
+
+	bio->bi_end_io = btrfs_end_empty_barrier;
+	bio->bi_bdev = device->bdev;
+	init_completion(&device->flush_wait);
+	bio->bi_private = &device->flush_wait;
+	device->flush_bio = bio;
+
+	bio_get(bio);
+	submit_bio(WRITE_FLUSH, bio);
+
+	return 0;
+}
+
+/*
+ * send an empty flush down to each device in parallel,
+ * then wait for them
+ */
+static int barrier_all_devices(struct btrfs_fs_info *info)
+{
+	struct list_head *head;
+	struct btrfs_device *dev;
+	int errors = 0;
+	int ret;
+
+	/* send down all the barriers */
+	head = &info->fs_devices->devices;
+	list_for_each_entry_rcu(dev, head, dev_list) {
+		if (!dev->bdev) {
+			errors++;
+			continue;
+		}
+		if (!dev->in_fs_metadata || !dev->writeable)
+			continue;
+
+		ret = write_dev_flush(dev, 0);
+		if (ret)
+			errors++;
+	}
+
+	/* wait for all the barriers */
+	list_for_each_entry_rcu(dev, head, dev_list) {
+		if (!dev->bdev) {
+			errors++;
+			continue;
+		}
+		if (!dev->in_fs_metadata || !dev->writeable)
+			continue;
+
+		ret = write_dev_flush(dev, 1);
+		if (ret)
+			errors++;
+	}
+	if (errors)
+		return -EIO;
+	return 0;
+}
+
 int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
 	struct list_head *head;
@@ -2672,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	head = &root->fs_info->fs_devices->devices;
+
+	if (do_barriers)
+		barrier_all_devices(root->fs_info);
+
 	list_for_each_entry_rcu(dev, head, dev_list) {
 		if (!dev->bdev) {
 			total_errors++;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9879bd4..930ae89 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 			     struct btrfs_root *root,
 			     int load_cache_only)
 {
+	DEFINE_WAIT(wait);
 	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_caching_control *caching_ctl;
 	int ret = 0;
 
-	smp_mb();
-	if (cache->cached != BTRFS_CACHE_NO)
+	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
+	BUG_ON(!caching_ctl);
+
+	INIT_LIST_HEAD(&caching_ctl->list);
+	mutex_init(&caching_ctl->mutex);
+	init_waitqueue_head(&caching_ctl->wait);
+	caching_ctl->block_group = cache;
+	caching_ctl->progress = cache->key.objectid;
+	atomic_set(&caching_ctl->count, 1);
+	caching_ctl->work.func = caching_thread;
+
+	spin_lock(&cache->lock);
+	/*
+	 * This should be a rare occasion, but this could happen I think in the
+	 * case where one thread starts to load the space cache info, and then
+	 * some other thread starts a transaction commit which tries to do an
+	 * allocation while the other thread is still loading the space cache
+	 * info.  The previous loop should have kept us from choosing this block
+	 * group, but if we've moved to the state where we will wait on caching
+	 * block groups we need to first check if we're doing a fast load here,
+	 * so we can wait for it to finish, otherwise we could end up allocating
+	 * from a block group who's cache gets evicted for one reason or
+	 * another.
+	 */
+	while (cache->cached == BTRFS_CACHE_FAST) {
+		struct btrfs_caching_control *ctl;
+
+		ctl = cache->caching_ctl;
+		atomic_inc(&ctl->count);
+		prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock(&cache->lock);
+
+		schedule();
+
+		finish_wait(&ctl->wait, &wait);
+		put_caching_control(ctl);
+		spin_lock(&cache->lock);
+	}
+
+	if (cache->cached != BTRFS_CACHE_NO) {
+		spin_unlock(&cache->lock);
+		kfree(caching_ctl);
 		return 0;
+	}
+	WARN_ON(cache->caching_ctl);
+	cache->caching_ctl = caching_ctl;
+	cache->cached = BTRFS_CACHE_FAST;
+	spin_unlock(&cache->lock);
 
 	/*
 	 * We can't do the read from on-disk cache during a commit since we need
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	if (trans && (!trans->transaction->in_commit) &&
 	    (root && root != root->fs_info->tree_root) &&
 	    btrfs_test_opt(root, SPACE_CACHE)) {
-		spin_lock(&cache->lock);
-		if (cache->cached != BTRFS_CACHE_NO) {
-			spin_unlock(&cache->lock);
-			return 0;
-		}
-		cache->cached = BTRFS_CACHE_STARTED;
-		spin_unlock(&cache->lock);
-
 		ret = load_free_space_cache(fs_info, cache);
 
 		spin_lock(&cache->lock);
 		if (ret == 1) {
+			cache->caching_ctl = NULL;
 			cache->cached = BTRFS_CACHE_FINISHED;
 			cache->last_byte_to_unpin = (u64)-1;
 		} else {
-			cache->cached = BTRFS_CACHE_NO;
+			if (load_cache_only) {
+				cache->caching_ctl = NULL;
+				cache->cached = BTRFS_CACHE_NO;
+			} else {
+				cache->cached = BTRFS_CACHE_STARTED;
+			}
 		}
 		spin_unlock(&cache->lock);
+		wake_up(&caching_ctl->wait);
 		if (ret == 1) {
+			put_caching_control(caching_ctl);
 			free_excluded_extents(fs_info->extent_root, cache);
 			return 0;
 		}
+	} else {
+		/*
+		 * We are not going to do the fast caching, set cached to the
+		 * appropriate value and wakeup any waiters.
+		 */
+		spin_lock(&cache->lock);
+		if (load_cache_only) {
+			cache->caching_ctl = NULL;
+			cache->cached = BTRFS_CACHE_NO;
+		} else {
+			cache->cached = BTRFS_CACHE_STARTED;
+		}
+		spin_unlock(&cache->lock);
+		wake_up(&caching_ctl->wait);
 	}
 
-	if (load_cache_only)
-		return 0;
-
-	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
-	BUG_ON(!caching_ctl);
-
-	INIT_LIST_HEAD(&caching_ctl->list);
-	mutex_init(&caching_ctl->mutex);
-	init_waitqueue_head(&caching_ctl->wait);
-	caching_ctl->block_group = cache;
-	caching_ctl->progress = cache->key.objectid;
-	/* one for caching kthread, one for caching block group list */
-	atomic_set(&caching_ctl->count, 2);
-	caching_ctl->work.func = caching_thread;
-
-	spin_lock(&cache->lock);
-	if (cache->cached != BTRFS_CACHE_NO) {
-		spin_unlock(&cache->lock);
-		kfree(caching_ctl);
+	if (load_cache_only) {
+		put_caching_control(caching_ctl);
 		return 0;
 	}
-	cache->caching_ctl = caching_ctl;
-	cache->cached = BTRFS_CACHE_STARTED;
-	spin_unlock(&cache->lock);
 
 	down_write(&fs_info->extent_commit_sem);
+	atomic_inc(&caching_ctl->count);
 	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
 	up_write(&fs_info->extent_commit_sem);
 
@@ -3797,16 +3838,16 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
 	kfree(rsv);
 }
 
-int btrfs_block_rsv_add(struct btrfs_root *root,
-			struct btrfs_block_rsv *block_rsv,
-			u64 num_bytes)
+static inline int __block_rsv_add(struct btrfs_root *root,
+				  struct btrfs_block_rsv *block_rsv,
+				  u64 num_bytes, int flush)
 {
 	int ret;
 
 	if (num_bytes == 0)
 		return 0;
 
-	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
+	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
 	if (!ret) {
 		block_rsv_add_bytes(block_rsv, num_bytes, 1);
 		return 0;
@@ -3815,22 +3856,18 @@ int btrfs_block_rsv_add(struct btrfs_root *root,
 	return ret;
 }
 
+int btrfs_block_rsv_add(struct btrfs_root *root,
+			struct btrfs_block_rsv *block_rsv,
+			u64 num_bytes)
+{
+	return __block_rsv_add(root, block_rsv, num_bytes, 1);
+}
+
 int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
 				struct btrfs_block_rsv *block_rsv,
 				u64 num_bytes)
 {
-	int ret;
-
-	if (num_bytes == 0)
-		return 0;
-
-	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0);
-	if (!ret) {
-		block_rsv_add_bytes(block_rsv, num_bytes, 1);
-		return 0;
-	}
-
-	return ret;
+	return __block_rsv_add(root, block_rsv, num_bytes, 0);
 }
 
 int btrfs_block_rsv_check(struct btrfs_root *root,
@@ -4064,23 +4101,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
  */
 static unsigned drop_outstanding_extent(struct inode *inode)
 {
+	unsigned drop_inode_space = 0;
 	unsigned dropped_extents = 0;
 
 	BUG_ON(!BTRFS_I(inode)->outstanding_extents);
 	BTRFS_I(inode)->outstanding_extents--;
 
+	if (BTRFS_I(inode)->outstanding_extents == 0 &&
+	    BTRFS_I(inode)->delalloc_meta_reserved) {
+		drop_inode_space = 1;
+		BTRFS_I(inode)->delalloc_meta_reserved = 0;
+	}
+
 	/*
 	 * If we have more or the same amount of outsanding extents than we have
 	 * reserved then we need to leave the reserved extents count alone.
 	 */
 	if (BTRFS_I(inode)->outstanding_extents >=
 	    BTRFS_I(inode)->reserved_extents)
-		return 0;
+		return drop_inode_space;
 
 	dropped_extents = BTRFS_I(inode)->reserved_extents -
 		BTRFS_I(inode)->outstanding_extents;
 	BTRFS_I(inode)->reserved_extents -= dropped_extents;
-	return dropped_extents;
+	return dropped_extents + drop_inode_space;
 }
 
 /**
@@ -4166,9 +4210,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		nr_extents = BTRFS_I(inode)->outstanding_extents -
 			BTRFS_I(inode)->reserved_extents;
 		BTRFS_I(inode)->reserved_extents += nr_extents;
+	}
 
-		to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
+	/*
+	 * Add an item to reserve for updating the inode when we complete the
+	 * delalloc io.
+	 */
+	if (!BTRFS_I(inode)->delalloc_meta_reserved) {
+		nr_extents++;
+		BTRFS_I(inode)->delalloc_meta_reserved = 1;
 	}
+
+	to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
 	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
 	spin_unlock(&BTRFS_I(inode)->lock);
 
@@ -5166,13 +5219,15 @@ search:
 		}
 
 have_block_group:
-		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+		cached = block_group_cache_done(block_group);
+		if (unlikely(!cached)) {
 			u64 free_percent;
 
+			found_uncached_bg = true;
 			ret = cache_block_group(block_group, trans,
 						orig_root, 1);
 			if (block_group->cached == BTRFS_CACHE_FINISHED)
-				goto have_block_group;
+				goto alloc;
 
 			free_percent = btrfs_block_group_used(&block_group->item);
 			free_percent *= 100;
@@ -5194,7 +5249,6 @@ have_block_group:
 							orig_root, 0);
 				BUG_ON(ret);
 			}
-			found_uncached_bg = true;
 
 			/*
 			 * If loop is set for cached only, try the next block
@@ -5204,10 +5258,7 @@ have_block_group:
 				goto loop;
 		}
 
-		cached = block_group_cache_done(block_group);
-		if (unlikely(!cached))
-			found_uncached_bg = true;
-
+alloc:
 		if (unlikely(block_group->ro))
 			goto loop;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1f87c4d..9472d3d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2285,8 +2285,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 				clean_io_failure(start, page);
 		}
 		if (!uptodate) {
-			u64 failed_mirror;
-			failed_mirror = (u64)bio->bi_bdev;
+			int failed_mirror;
+			failed_mirror = (int)(unsigned long)bio->bi_bdev;
 			if (tree->ops && tree->ops->readpage_io_failed_hook)
 				ret = tree->ops->readpage_io_failed_hook(
 						bio, page, start, end,
@@ -3366,6 +3366,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		return -ENOMEM;
 	path->leave_spinning = 1;
 
+	start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
+	len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
+
 	/*
 	 * lookup the last file extent.  We're not using i_size here
 	 * because there might be preallocation past i_size
@@ -3413,7 +3416,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
 			 &cached_state, GFP_NOFS);
 
-	em = get_extent_skip_holes(inode, off, last_for_get_extent,
+	em = get_extent_skip_holes(inode, start, last_for_get_extent,
 				   get_extent);
 	if (!em)
 		goto out;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index feb9be0..7604c30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -70,7 +70,7 @@ struct extent_io_ops {
 			      unsigned long bio_flags);
 	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
 	int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
-				       u64 start, u64 end, u64 failed_mirror,
+				       u64 start, u64 end, int failed_mirror,
 				       struct extent_state *state);
 	int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
 					u64 start, u64 end,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7a15fcf..6e5b7e4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -351,6 +351,11 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
 		}
 	}
 
+	for (i = 0; i < io_ctl->num_pages; i++) {
+		clear_page_dirty_for_io(io_ctl->pages[i]);
+		set_page_extent_mapped(io_ctl->pages[i]);
+	}
+
 	return 0;
 }
 
@@ -537,6 +542,13 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
 			    struct btrfs_free_space *entry, u8 *type)
 {
 	struct btrfs_free_space_entry *e;
+	int ret;
+
+	if (!io_ctl->cur) {
+		ret = io_ctl_check_crc(io_ctl, io_ctl->index);
+		if (ret)
+			return ret;
+	}
 
 	e = io_ctl->cur;
 	entry->offset = le64_to_cpu(e->offset);
@@ -550,10 +562,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
 
 	io_ctl_unmap_page(io_ctl);
 
-	if (io_ctl->index >= io_ctl->num_pages)
-		return 0;
-
-	return io_ctl_check_crc(io_ctl, io_ctl->index);
+	return 0;
 }
 
 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
@@ -561,9 +570,6 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
 {
 	int ret;
 
-	if (io_ctl->cur && io_ctl->cur != io_ctl->orig)
-		io_ctl_unmap_page(io_ctl);
-
 	ret = io_ctl_check_crc(io_ctl, io_ctl->index);
 	if (ret)
 		return ret;
@@ -699,6 +705,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 		num_entries--;
 	}
 
+	io_ctl_unmap_page(&io_ctl);
+
 	/*
 	 * We add the bitmaps at the end of the entries in order that
 	 * the bitmap entries are added to the cache.
@@ -1841,7 +1849,13 @@ again:
 		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
 					  1, 0);
 		if (!info) {
-			WARN_ON(1);
+			/* the tree logging code might be calling us before we
+			 * have fully loaded the free space rbtree for this
+			 * block group.  So it is possible the entry won't
+			 * be in the rbtree yet at all.  The caching code
+			 * will make sure not to put it in the rbtree if
+			 * the logging code has pinned it.
+			 */
 			goto out_lock;
 		}
 	}
@@ -2448,16 +2462,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *entry;
-	struct rb_node *node;
 	int ret = -ENOSPC;
+	u64 bitmap_offset = offset_to_bitmap(ctl, offset);
 
 	if (ctl->total_bitmaps == 0)
 		return -ENOSPC;
 
 	/*
-	 * First check our cached list of bitmaps and see if there is an entry
-	 * here that will work.
+	 * The bitmap that covers offset won't be in the list unless offset
+	 * is just its start offset.
 	 */
+	entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
+	if (entry->offset != bitmap_offset) {
+		entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
+		if (entry && list_empty(&entry->list))
+			list_add(&entry->list, bitmaps);
+	}
+
 	list_for_each_entry(entry, bitmaps, list) {
 		if (entry->bytes < min_bytes)
 			continue;
@@ -2468,38 +2489,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
 	}
 
 	/*
-	 * If we do have entries on our list and we are here then we didn't find
-	 * anything, so go ahead and get the next entry after the last entry in
-	 * this list and start the search from there.
+	 * The bitmaps list has all the bitmaps that record free space
+	 * starting after offset, so no more search is required.
 	 */
-	if (!list_empty(bitmaps)) {
-		entry = list_entry(bitmaps->prev, struct btrfs_free_space,
-				   list);
-		node = rb_next(&entry->offset_index);
-		if (!node)
-			return -ENOSPC;
-		entry = rb_entry(node, struct btrfs_free_space, offset_index);
-		goto search;
-	}
-
-	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
-	if (!entry)
-		return -ENOSPC;
-
-search:
-	node = &entry->offset_index;
-	do {
-		entry = rb_entry(node, struct btrfs_free_space, offset_index);
-		node = rb_next(&entry->offset_index);
-		if (!entry->bitmap)
-			continue;
-		if (entry->bytes < min_bytes)
-			continue;
-		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
-					   bytes, min_bytes);
-	} while (ret && node);
-
-	return ret;
+	return -ENOSPC;
 }
 
 /*
@@ -2517,8 +2510,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
 			     u64 offset, u64 bytes, u64 empty_size)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
-	struct list_head bitmaps;
 	struct btrfs_free_space *entry, *tmp;
+	LIST_HEAD(bitmaps);
 	u64 min_bytes;
 	int ret;
 
@@ -2557,7 +2550,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	INIT_LIST_HEAD(&bitmaps);
 	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
 				      bytes, min_bytes);
 	if (ret)
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 53dcbdf..f8962a9 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -398,6 +398,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 	struct btrfs_path *path;
 	struct inode *inode;
+	struct btrfs_block_rsv *rsv;
+	u64 num_bytes;
 	u64 alloc_hint = 0;
 	int ret;
 	int prealloc;
@@ -421,11 +423,26 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	if (!path)
 		return -ENOMEM;
 
+	rsv = trans->block_rsv;
+	trans->block_rsv = &root->fs_info->trans_block_rsv;
+
+	num_bytes = trans->bytes_reserved;
+	/*
+	 * 1 item for inode item insertion if need
+	 * 3 items for inode item update (in the worst case)
+	 * 1 item for free space object
+	 * 3 items for pre-allocation
+	 */
+	trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
+	ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
+					  trans->bytes_reserved);
+	if (ret)
+		goto out;
 again:
 	inode = lookup_free_ino_inode(root, path);
 	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
 		ret = PTR_ERR(inode);
-		goto out;
+		goto out_release;
 	}
 
 	if (IS_ERR(inode)) {
@@ -434,7 +451,7 @@ again:
 
 		ret = create_free_ino_inode(root, trans, path);
 		if (ret)
-			goto out;
+			goto out_release;
 		goto again;
 	}
 
@@ -477,11 +494,14 @@ again:
 	}
 	btrfs_free_reserved_data_space(inode, prealloc);
 
+	ret = btrfs_write_out_ino_cache(root, trans, path);
 out_put:
 	iput(inode);
+out_release:
+	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 out:
-	if (ret == 0)
-		ret = btrfs_write_out_ino_cache(root, trans, path);
+	trans->block_rsv = rsv;
+	trans->bytes_reserved = num_bytes;
 
 	btrfs_free_path(path);
 	return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 966ddcc..526dd51 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -93,6 +93,8 @@ static noinline int cow_file_range(struct inode *inode,
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
 				   unsigned long *nr_written, int unlock);
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root, struct inode *inode);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 				     struct inode *inode,  struct inode *dir,
@@ -1741,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 				trans = btrfs_join_transaction(root);
 			BUG_ON(IS_ERR(trans));
 			trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-			ret = btrfs_update_inode(trans, root, inode);
+			ret = btrfs_update_inode_fallback(trans, root, inode);
 			BUG_ON(ret);
 		}
 		goto out;
@@ -1791,7 +1793,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
 	ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
 	if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-		ret = btrfs_update_inode(trans, root, inode);
+		ret = btrfs_update_inode_fallback(trans, root, inode);
 		BUG_ON(ret);
 	}
 	ret = 0;
@@ -2199,6 +2201,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 		if (ret)
 			goto out;
 	}
+	/* release the path since we're done with it */
+	btrfs_release_path(path);
+
 	root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
 
 	if (root->orphan_block_rsv)
@@ -2426,7 +2431,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 /*
  * copy everything in the in-memory inode into the btree.
  */
-noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode)
 {
 	struct btrfs_inode_item *inode_item;
@@ -2434,21 +2439,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	int ret;
 
-	/*
-	 * If the inode is a free space inode, we can deadlock during commit
-	 * if we put it into the delayed code.
-	 *
-	 * The data relocation inode should also be directly updated
-	 * without delay
-	 */
-	if (!btrfs_is_free_space_inode(root, inode)
-	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
-		ret = btrfs_delayed_update_inode(trans, root, inode);
-		if (!ret)
-			btrfs_set_inode_last_trans(trans, inode);
-		return ret;
-	}
-
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -2477,6 +2467,43 @@ failed:
 }
 
 /*
+ * copy everything in the in-memory inode into the btree.
+ */
+noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root, struct inode *inode)
+{
+	int ret;
+
+	/*
+	 * If the inode is a free space inode, we can deadlock during commit
+	 * if we put it into the delayed code.
+	 *
+	 * The data relocation inode should also be directly updated
+	 * without delay
+	 */
+	if (!btrfs_is_free_space_inode(root, inode)
+	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+		ret = btrfs_delayed_update_inode(trans, root, inode);
+		if (!ret)
+			btrfs_set_inode_last_trans(trans, inode);
+		return ret;
+	}
+
+	return btrfs_update_inode_item(trans, root, inode);
+}
+
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root, struct inode *inode)
+{
+	int ret;
+
+	ret = btrfs_update_inode(trans, root, inode);
+	if (ret == -ENOSPC)
+		return btrfs_update_inode_item(trans, root, inode);
+	return ret;
+}
+
+/*
  * unlink helper that gets used here in inode.c and in the tree logging
  * recovery code.  It remove a link in a directory with a given name, and
  * also drops the back refs in the inode to the directory
@@ -5632,7 +5659,7 @@ again:
 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
 		ret = btrfs_ordered_update_i_size(inode, 0, ordered);
 		if (!ret)
-			err = btrfs_update_inode(trans, root, inode);
+			err = btrfs_update_inode_fallback(trans, root, inode);
 		goto out;
 	}
 
@@ -5670,7 +5697,7 @@ again:
 	add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
 	ret = btrfs_ordered_update_i_size(inode, 0, ordered);
 	if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
-		btrfs_update_inode(trans, root, inode);
+		btrfs_update_inode_fallback(trans, root, inode);
 	ret = 0;
 out_unlock:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
@@ -6529,14 +6556,16 @@ end_trans:
 		ret = btrfs_orphan_del(NULL, inode);
 	}
 
-	trans->block_rsv = &root->fs_info->trans_block_rsv;
-	ret = btrfs_update_inode(trans, root, inode);
-	if (ret && !err)
-		err = ret;
+	if (trans) {
+		trans->block_rsv = &root->fs_info->trans_block_rsv;
+		ret = btrfs_update_inode(trans, root, inode);
+		if (ret && !err)
+			err = ret;
 
-	nr = trans->blocks_used;
-	ret = btrfs_end_transaction_throttle(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+		nr = trans->blocks_used;
+		ret = btrfs_end_transaction_throttle(trans, root);
+		btrfs_btree_balance_dirty(root, nr);
+	}
 
 out:
 	btrfs_free_block_rsv(root, rsv);
@@ -6605,6 +6634,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->orphan_meta_reserved = 0;
 	ei->dummy_inode = 0;
 	ei->in_defrag = 0;
+	ei->delalloc_meta_reserved = 0;
 	ei->force_compress = BTRFS_COMPRESS_NONE;
 
 	ei->delayed_node = NULL;
@@ -6764,11 +6794,13 @@ static int btrfs_getattr(struct vfsmount *mnt,
 			 struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
+	u32 blocksize = inode->i_sb->s_blocksize;
+
 	generic_fillattr(inode, stat);
 	stat->dev = BTRFS_I(inode)->root->anon_dev;
 	stat->blksize = PAGE_CACHE_SIZE;
-	stat->blocks = (inode_get_bytes(inode) +
-			BTRFS_I(inode)->delalloc_bytes) >> 9;
+	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
+		ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
 	return 0;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4a34c47..a90e749 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1216,12 +1216,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 		*devstr = '\0';
 		devstr = vol_args->name;
 		devid = simple_strtoull(devstr, &end, 10);
-		printk(KERN_INFO "resizing devid %llu\n",
+		printk(KERN_INFO "btrfs: resizing devid %llu\n",
 		       (unsigned long long)devid);
 	}
 	device = btrfs_find_device(root, devid, NULL, NULL);
 	if (!device) {
-		printk(KERN_INFO "resizer unable to find device %llu\n",
+		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
 		       (unsigned long long)devid);
 		ret = -EINVAL;
 		goto out_unlock;
@@ -1267,7 +1267,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 	do_div(new_size, root->sectorsize);
 	new_size *= root->sectorsize;
 
-	printk(KERN_INFO "new size for %s is %llu\n",
+	printk(KERN_INFO "btrfs: new size for %s is %llu\n",
 		device->name, (unsigned long long)new_size);
 
 	if (new_size > old_size) {
@@ -2930,11 +2930,13 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
 		goto out;
 
 	for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
-		rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val;
+		rel_ptr = ipath->fspath->val[i] -
+			  (u64)(unsigned long)ipath->fspath->val;
 		ipath->fspath->val[i] = rel_ptr;
 	}
 
-	ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size);
+	ret = copy_to_user((void *)(unsigned long)ipa->fspath,
+			   (void *)(unsigned long)ipath->fspath, size);
 	if (ret) {
 		ret = -EFAULT;
 		goto out;
@@ -3017,7 +3019,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
 	if (ret < 0)
 		goto out;
 
-	ret = copy_to_user((void *)loi->inodes, (void *)inodes, size);
+	ret = copy_to_user((void *)(unsigned long)loi->inodes,
+			   (void *)(unsigned long)inodes, size);
 	if (ret)
 		ret = -EFAULT;
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 24d654c..dff29d5 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1174,6 +1174,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 			list_add_tail(&new_edge->list[UPPER],
 				      &new_node->lower);
 		}
+	} else {
+		list_add_tail(&new_node->lower, &cache->leaves);
 	}
 
 	rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ed11d38..fab420d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -272,7 +272,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
 			swarn->logical, swarn->dev->name,
 			(unsigned long long)swarn->sector, root, inum, offset,
 			min(isize - offset, (u64)PAGE_SIZE), nlink,
-			(char *)ipath->fspath->val[i]);
+			(char *)(unsigned long)ipath->fspath->val[i]);
 
 	free_ipath(ipath);
 	return 0;
@@ -944,50 +944,18 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 static int scrub_submit(struct scrub_dev *sdev)
 {
 	struct scrub_bio *sbio;
-	struct bio *bio;
-	int i;
 
 	if (sdev->curr == -1)
 		return 0;
 
 	sbio = sdev->bios[sdev->curr];
-
-	bio = bio_alloc(GFP_NOFS, sbio->count);
-	if (!bio)
-		goto nomem;
-
-	bio->bi_private = sbio;
-	bio->bi_end_io = scrub_bio_end_io;
-	bio->bi_bdev = sdev->dev->bdev;
-	bio->bi_sector = sbio->physical >> 9;
-
-	for (i = 0; i < sbio->count; ++i) {
-		struct page *page;
-		int ret;
-
-		page = alloc_page(GFP_NOFS);
-		if (!page)
-			goto nomem;
-
-		ret = bio_add_page(bio, page, PAGE_SIZE, 0);
-		if (!ret) {
-			__free_page(page);
-			goto nomem;
-		}
-	}
-
 	sbio->err = 0;
 	sdev->curr = -1;
 	atomic_inc(&sdev->in_flight);
 
-	submit_bio(READ, bio);
+	submit_bio(READ, sbio->bio);
 
 	return 0;
-
-nomem:
-	scrub_free_bio(bio);
-
-	return -ENOMEM;
 }
 
 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -995,6 +963,8 @@ static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
 		      u8 *csum, int force)
 {
 	struct scrub_bio *sbio;
+	struct page *page;
+	int ret;
 
 again:
 	/*
@@ -1015,12 +985,22 @@ again:
 	}
 	sbio = sdev->bios[sdev->curr];
 	if (sbio->count == 0) {
+		struct bio *bio;
+
 		sbio->physical = physical;
 		sbio->logical = logical;
+		bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
+		if (!bio)
+			return -ENOMEM;
+
+		bio->bi_private = sbio;
+		bio->bi_end_io = scrub_bio_end_io;
+		bio->bi_bdev = sdev->dev->bdev;
+		bio->bi_sector = sbio->physical >> 9;
+		sbio->err = 0;
+		sbio->bio = bio;
 	} else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
 		   sbio->logical + sbio->count * PAGE_SIZE != logical) {
-		int ret;
-
 		ret = scrub_submit(sdev);
 		if (ret)
 			return ret;
@@ -1030,6 +1010,20 @@ again:
 	sbio->spag[sbio->count].generation = gen;
 	sbio->spag[sbio->count].have_csum = 0;
 	sbio->spag[sbio->count].mirror_num = mirror_num;
+
+	page = alloc_page(GFP_NOFS);
+	if (!page)
+		return -ENOMEM;
+
+	ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0);
+	if (!ret) {
+		__free_page(page);
+		ret = scrub_submit(sdev);
+		if (ret)
+			return ret;
+		goto again;
+	}
+
 	if (csum) {
 		sbio->spag[sbio->count].have_csum = 1;
 		memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 57080df..17ee7fc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -197,7 +197,7 @@ static match_table_t tokens = {
 	{Opt_subvolrootid, "subvolrootid=%d"},
 	{Opt_defrag, "autodefrag"},
 	{Opt_inode_cache, "inode_cache"},
-	{Opt_no_space_cache, "no_space_cache"},
+	{Opt_no_space_cache, "nospace_cache"},
 	{Opt_recovery, "recovery"},
 	{Opt_err, NULL},
 };
@@ -448,6 +448,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_subvol:
+			kfree(*subvol_name);
 			*subvol_name = match_strdup(&args[0]);
 			break;
 		case Opt_subvolid:
@@ -710,7 +711,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (btrfs_test_opt(root, SPACE_CACHE))
 		seq_puts(seq, ",space_cache");
 	else
-		seq_puts(seq, ",no_space_cache");
+		seq_puts(seq, ",nospace_cache");
 	if (btrfs_test_opt(root, CLEAR_CACHE))
 		seq_puts(seq, ",clear_cache");
 	if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -824,13 +825,9 @@ static char *setup_root_args(char *args)
 static struct dentry *mount_subvol(const char *subvol_name, int flags,
 				   const char *device_name, char *data)
 {
-	struct super_block *s;
 	struct dentry *root;
 	struct vfsmount *mnt;
-	struct mnt_namespace *ns_private;
 	char *newargs;
-	struct path path;
-	int error;
 
 	newargs = setup_root_args(data);
 	if (!newargs)
@@ -841,39 +838,17 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
 	if (IS_ERR(mnt))
 		return ERR_CAST(mnt);
 
-	ns_private = create_mnt_ns(mnt);
-	if (IS_ERR(ns_private)) {
-		mntput(mnt);
-		return ERR_CAST(ns_private);
-	}
-
-	/*
-	 * This will trigger the automount of the subvol so we can just
-	 * drop the mnt we have here and return the dentry that we
-	 * found.
-	 */
-	error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name,
-				LOOKUP_FOLLOW, &path);
-	put_mnt_ns(ns_private);
-	if (error)
-		return ERR_PTR(error);
+	root = mount_subtree(mnt, subvol_name);
 
-	if (!is_subvolume_inode(path.dentry->d_inode)) {
-		path_put(&path);
-		mntput(mnt);
-		error = -EINVAL;
+	if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
+		struct super_block *s = root->d_sb;
+		dput(root);
+		root = ERR_PTR(-EINVAL);
+		deactivate_locked_super(s);
 		printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",
 				subvol_name);
-		return ERR_PTR(-EINVAL);
 	}
 
-	/* Get a ref to the sb and the dentry we found and return it */
-	s = path.mnt->mnt_sb;
-	atomic_inc(&s->s_active);
-	root = dget(path.dentry);
-	path_put(&path);
-	down_write(&s->s_umount);
-
 	return root;
 }
 
@@ -890,7 +865,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct dentry *root;
 	struct btrfs_fs_devices *fs_devices = NULL;
-	struct btrfs_root *tree_root = NULL;
 	struct btrfs_fs_info *fs_info = NULL;
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
@@ -904,8 +878,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	error = btrfs_parse_early_options(data, mode, fs_type,
 					  &subvol_name, &subvol_objectid,
 					  &subvol_rootid, &fs_devices);
-	if (error)
+	if (error) {
+		kfree(subvol_name);
 		return ERR_PTR(error);
+	}
 
 	if (subvol_name) {
 		root = mount_subvol(subvol_name, flags, device_name, data);
@@ -917,15 +893,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	if (error)
 		return ERR_PTR(error);
 
-	error = btrfs_open_devices(fs_devices, mode, fs_type);
-	if (error)
-		return ERR_PTR(error);
-
-	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
-		error = -EACCES;
-		goto error_close_devices;
-	}
-
 	/*
 	 * Setup a dummy root and fs_info for test/set super.  This is because
 	 * we don't actually fill this stuff out until open_ctree, but we need
@@ -933,24 +900,36 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	 * then open_ctree will properly initialize everything later.
 	 */
 	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
-	tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
-	if (!fs_info || !tree_root) {
+	if (!fs_info)
+		return ERR_PTR(-ENOMEM);
+
+	fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	if (!fs_info->tree_root) {
 		error = -ENOMEM;
-		goto error_close_devices;
+		goto error_fs_info;
 	}
-	fs_info->tree_root = tree_root;
+	fs_info->tree_root->fs_info = fs_info;
 	fs_info->fs_devices = fs_devices;
-	tree_root->fs_info = fs_info;
 
 	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
 	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
 	if (!fs_info->super_copy || !fs_info->super_for_commit) {
 		error = -ENOMEM;
+		goto error_fs_info;
+	}
+
+	error = btrfs_open_devices(fs_devices, mode, fs_type);
+	if (error)
+		goto error_fs_info;
+
+	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
+		error = -EACCES;
 		goto error_close_devices;
 	}
 
 	bdev = fs_devices->latest_bdev;
-	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
+	s = sget(fs_type, btrfs_test_super, btrfs_set_super,
+		 fs_info->tree_root);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto error_close_devices;
@@ -959,12 +938,12 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & MS_RDONLY) {
 			deactivate_locked_super(s);
-			return ERR_PTR(-EBUSY);
+			error = -EBUSY;
+			goto error_close_devices;
 		}
 
 		btrfs_close_devices(fs_devices);
 		free_fs_info(fs_info);
-		kfree(tree_root);
 	} else {
 		char b[BDEVNAME_SIZE];
 
@@ -991,8 +970,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
 error_close_devices:
 	btrfs_close_devices(fs_devices);
+error_fs_info:
 	free_fs_info(fs_info);
-	kfree(tree_root);
 	return ERR_PTR(error);
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 960835e..81376d9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -785,6 +785,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 
 			btrfs_save_ino_cache(root, trans);
 
+			/* see comments in should_cow_block() */
+			root->force_cow = 0;
+			smp_wmb();
+
 			if (root->commit_root != root->node) {
 				mutex_lock(&root->fs_commit_mutex);
 				switch_commit_root(root);
@@ -882,8 +886,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 
 	if (to_reserve > 0) {
-		ret = btrfs_block_rsv_add(root, &pending->block_rsv,
-					  to_reserve);
+		ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
+						  to_reserve);
 		if (ret) {
 			pending->error = ret;
 			goto fail;
@@ -947,6 +951,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	btrfs_tree_unlock(old);
 	free_extent_buffer(old);
 
+	/* see comments in should_cow_block() */
+	root->force_cow = 1;
+	smp_wmb();
+
 	btrfs_set_root_node(new_root_item, tmp);
 	/* record when the snapshot was created in key.offset */
 	key.offset = trans->transid;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f8e29431..c37433d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -999,7 +999,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
 	key.objectid = device->devid;
 	key.offset = start;
 	key.type = BTRFS_DEV_EXTENT_KEY;
-
+again:
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 	if (ret > 0) {
 		ret = btrfs_previous_item(root, path, key.objectid,
@@ -1012,6 +1012,9 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
 					struct btrfs_dev_extent);
 		BUG_ON(found_key.offset > start || found_key.offset +
 		       btrfs_dev_extent_length(leaf, extent) < start);
+		key = found_key;
+		btrfs_release_path(path);
+		goto again;
 	} else if (ret == 0) {
 		leaf = path->nodes[0];
 		extent = btrfs_item_ptr(leaf, path->slots[0],
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ab5b1c4..78f2d4d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -100,6 +100,12 @@ struct btrfs_device {
 	struct reada_zone *reada_curr_zone;
 	struct radix_tree_root reada_zones;
 	struct radix_tree_root reada_extents;
+
+	/* for sending down flush barriers */
+	struct bio *flush_bio;
+	struct completion flush_wait;
+	int nobarriers;
+
 };
 
 struct btrfs_fs_devices {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2abd0df..bca3948 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1143,7 +1143,7 @@ static void ceph_d_prune(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di;
 
-	dout("d_release %p\n", dentry);
+	dout("ceph_d_prune %p\n", dentry);
 
 	/* do we have a valid parent? */
 	if (!dentry->d_parent || IS_ROOT(dentry))
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e392bfc..116f365 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1328,12 +1328,13 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
  */
 void ceph_queue_writeback(struct inode *inode)
 {
+	ihold(inode);
 	if (queue_work(ceph_inode_to_client(inode)->wb_wq,
 		       &ceph_inode(inode)->i_wb_work)) {
 		dout("ceph_queue_writeback %p\n", inode);
-		ihold(inode);
 	} else {
 		dout("ceph_queue_writeback %p failed\n", inode);
+		iput(inode);
 	}
 }
 
@@ -1353,12 +1354,13 @@ static void ceph_writeback_work(struct work_struct *work)
  */
 void ceph_queue_invalidate(struct inode *inode)
 {
+	ihold(inode);
 	if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
 		       &ceph_inode(inode)->i_pg_inv_work)) {
 		dout("ceph_queue_invalidate %p\n", inode);
-		ihold(inode);
 	} else {
 		dout("ceph_queue_invalidate %p failed\n", inode);
+		iput(inode);
 	}
 }
 
@@ -1434,13 +1436,14 @@ void ceph_queue_vmtruncate(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
+	ihold(inode);
 	if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
 		       &ci->i_vmtruncate_work)) {
 		dout("ceph_queue_vmtruncate %p\n", inode);
-		ihold(inode);
 	} else {
 		dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
 		     inode, ci->i_truncate_pending);
+		iput(inode);
 	}
 }
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a90846f..8dc73a5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -638,10 +638,12 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	if (err == 0) {
 		dout("open_root_inode success\n");
 		if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
-		    fsc->sb->s_root == NULL)
+		    fsc->sb->s_root == NULL) {
 			root = d_alloc_root(req->r_target_inode);
-		else
+			ceph_init_dentry(root);
+		} else {
 			root = d_obtain_alias(req->r_target_inode);
+		}
 		req->r_target_inode = NULL;
 		dout("open_root_inode success, root dentry is %p\n", root);
 	} else {
diff --git a/fs/dcache.c b/fs/dcache.c
index a901c69..10ba92d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -36,6 +36,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
 #include <linux/prefetch.h>
+#include <linux/ratelimit.h>
 #include "internal.h"
 
 /*
@@ -2383,8 +2384,16 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 				actual = __d_unalias(inode, dentry, alias);
 			}
 			write_sequnlock(&rename_lock);
-			if (IS_ERR(actual))
+			if (IS_ERR(actual)) {
+				if (PTR_ERR(actual) == -ELOOP)
+					pr_warn_ratelimited(
+						"VFS: Lookup of '%s' in %s %s"
+						" would have caused loop\n",
+						dentry->d_name.name,
+						inode->i_sb->s_type->name,
+						inode->i_sb->s_id);
 				dput(alias);
+			}
 			goto out_nolock;
 		}
 	}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 58609bd..2a83425 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -967,7 +967,7 @@ static void ecryptfs_set_default_crypt_stat_vals(
 
 /**
  * ecryptfs_new_file_context
- * @ecryptfs_dentry: The eCryptfs dentry
+ * @ecryptfs_inode: The eCryptfs inode
  *
  * If the crypto context for the file has not yet been established,
  * this is where we do that.  Establishing a new crypto context
@@ -984,13 +984,13 @@ static void ecryptfs_set_default_crypt_stat_vals(
  *
  * Returns zero on success; non-zero otherwise
  */
-int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
+int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
 {
 	struct ecryptfs_crypt_stat *crypt_stat =
-	    &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+	    &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 	    &ecryptfs_superblock_to_private(
-		    ecryptfs_dentry->d_sb)->mount_crypt_stat;
+		    ecryptfs_inode->i_sb)->mount_crypt_stat;
 	int cipher_name_len;
 	int rc = 0;
 
@@ -1299,12 +1299,12 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
 }
 
 static int
-ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
+ecryptfs_write_metadata_to_contents(struct inode *ecryptfs_inode,
 				    char *virt, size_t virt_len)
 {
 	int rc;
 
-	rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
+	rc = ecryptfs_write_lower(ecryptfs_inode, virt,
 				  0, virt_len);
 	if (rc < 0)
 		printk(KERN_ERR "%s: Error attempting to write header "
@@ -1338,7 +1338,8 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
 
 /**
  * ecryptfs_write_metadata
- * @ecryptfs_dentry: The eCryptfs dentry
+ * @ecryptfs_dentry: The eCryptfs dentry, which should be negative
+ * @ecryptfs_inode: The newly created eCryptfs inode
  *
  * Write the file headers out.  This will likely involve a userspace
  * callout, in which the session key is encrypted with one or more
@@ -1348,10 +1349,11 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
  *
  * Returns zero on success; non-zero on error
  */
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+			    struct inode *ecryptfs_inode)
 {
 	struct ecryptfs_crypt_stat *crypt_stat =
-		&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	unsigned int order;
 	char *virt;
 	size_t virt_len;
@@ -1391,7 +1393,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 		rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
 						      size);
 	else
-		rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
+		rc = ecryptfs_write_metadata_to_contents(ecryptfs_inode, virt,
 							 virt_len);
 	if (rc) {
 		printk(KERN_ERR "%s: Error writing metadata out to lower file; "
@@ -1943,7 +1945,7 @@ static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
 
 /* We could either offset on every reverse map or just pad some 0x00's
  * at the front here */
-static const unsigned char filename_rev_map[] = {
+static const unsigned char filename_rev_map[256] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
@@ -1959,7 +1961,7 @@ static const unsigned char filename_rev_map[] = {
 	0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
 	0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
 	0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
-	0x3D, 0x3E, 0x3F
+	0x3D, 0x3E, 0x3F /* 123 - 255 initialized to 0x00 */
 };
 
 /**
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 54481a3..a9f29b1 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -584,9 +584,10 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
 int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode);
 int ecryptfs_encrypt_page(struct page *page);
 int ecryptfs_decrypt_page(struct page *page);
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+			    struct inode *ecryptfs_inode);
 int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
-int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
+int ecryptfs_new_file_context(struct inode *ecryptfs_inode);
 void ecryptfs_write_crypt_stat_flags(char *page_virt,
 				     struct ecryptfs_crypt_stat *crypt_stat,
 				     size_t *written);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c6ac98c..d3f95f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -139,6 +139,27 @@ out:
 	return rc;
 }
 
+static void ecryptfs_vma_close(struct vm_area_struct *vma)
+{
+	filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+static const struct vm_operations_struct ecryptfs_file_vm_ops = {
+	.close		= ecryptfs_vma_close,
+	.fault		= filemap_fault,
+};
+
+static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int rc;
+
+	rc = generic_file_mmap(file, vma);
+	if (!rc)
+		vma->vm_ops = &ecryptfs_file_vm_ops;
+
+	return rc;
+}
+
 struct kmem_cache *ecryptfs_file_info_cache;
 
 /**
@@ -349,7 +370,7 @@ const struct file_operations ecryptfs_main_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = ecryptfs_compat_ioctl,
 #endif
-	.mmap = generic_file_mmap,
+	.mmap = ecryptfs_file_mmap,
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
 	.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a36d327..32f90a3 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -172,22 +172,23 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
  * it. It will also update the eCryptfs directory inode to mimic the
  * stat of the lower directory inode.
  *
- * Returns zero on success; non-zero on error condition
+ * Returns the new eCryptfs inode on success; an ERR_PTR on error condition
  */
-static int
+static struct inode *
 ecryptfs_do_create(struct inode *directory_inode,
 		   struct dentry *ecryptfs_dentry, int mode)
 {
 	int rc;
 	struct dentry *lower_dentry;
 	struct dentry *lower_dir_dentry;
+	struct inode *inode;
 
 	lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
 	if (IS_ERR(lower_dir_dentry)) {
 		ecryptfs_printk(KERN_ERR, "Error locking directory of "
 				"dentry\n");
-		rc = PTR_ERR(lower_dir_dentry);
+		inode = ERR_CAST(lower_dir_dentry);
 		goto out;
 	}
 	rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
@@ -195,20 +196,19 @@ ecryptfs_do_create(struct inode *directory_inode,
 	if (rc) {
 		printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
 		       "rc = [%d]\n", __func__, rc);
+		inode = ERR_PTR(rc);
 		goto out_lock;
 	}
-	rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
-				directory_inode->i_sb);
-	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
+	inode = __ecryptfs_get_inode(lower_dentry->d_inode,
+				     directory_inode->i_sb);
+	if (IS_ERR(inode))
 		goto out_lock;
-	}
 	fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
 	fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
 out_lock:
 	unlock_dir(lower_dir_dentry);
 out:
-	return rc;
+	return inode;
 }
 
 /**
@@ -219,26 +219,26 @@ out:
  *
  * Returns zero on success
  */
-static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
+static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+				    struct inode *ecryptfs_inode)
 {
 	struct ecryptfs_crypt_stat *crypt_stat =
-		&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	int rc = 0;
 
-	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
+	if (S_ISDIR(ecryptfs_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
 		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
 		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
-	rc = ecryptfs_new_file_context(ecryptfs_dentry);
+	rc = ecryptfs_new_file_context(ecryptfs_inode);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error creating new file "
 				"context; rc = [%d]\n", rc);
 		goto out;
 	}
-	rc = ecryptfs_get_lower_file(ecryptfs_dentry,
-				     ecryptfs_dentry->d_inode);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry, ecryptfs_inode);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
 			"the lower file for the dentry with name "
@@ -246,10 +246,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 			ecryptfs_dentry->d_name.name, rc);
 		goto out;
 	}
-	rc = ecryptfs_write_metadata(ecryptfs_dentry);
+	rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode);
 	if (rc)
 		printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
-	ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
+	ecryptfs_put_lower_file(ecryptfs_inode);
 out:
 	return rc;
 }
@@ -269,18 +269,28 @@ static int
 ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
 		int mode, struct nameidata *nd)
 {
+	struct inode *ecryptfs_inode;
 	int rc;
 
-	/* ecryptfs_do_create() calls ecryptfs_interpose() */
-	rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
-	if (unlikely(rc)) {
+	ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry,
+					    mode);
+	if (unlikely(IS_ERR(ecryptfs_inode))) {
 		ecryptfs_printk(KERN_WARNING, "Failed to create file in"
 				"lower filesystem\n");
+		rc = PTR_ERR(ecryptfs_inode);
 		goto out;
 	}
 	/* At this point, a file exists on "disk"; we need to make sure
 	 * that this on disk file is prepared to be an ecryptfs file */
-	rc = ecryptfs_initialize_file(ecryptfs_dentry);
+	rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
+	if (rc) {
+		drop_nlink(ecryptfs_inode);
+		unlock_new_inode(ecryptfs_inode);
+		iput(ecryptfs_inode);
+		goto out;
+	}
+	d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+	unlock_new_inode(ecryptfs_inode);
 out:
 	return rc;
 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f6dba45..12ccacd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -565,7 +565,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
 	brelse(bitmap_bh);
 	printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
 	       ", computed = %llu, %llu\n",
-	       EXT4_B2C(sbi, ext4_free_blocks_count(es)),
+	       EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
 	       desc_count, bitmap_count);
 	return bitmap_count;
 #else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 240f6e2..fffec40 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2270,6 +2270,7 @@ retry:
 			ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
 			       "%ld pages, ino %lu; err %d", __func__,
 				wbc->nr_to_write, inode->i_ino, ret);
+			blk_finish_plug(&plug);
 			goto out_writepages;
 		}
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9953d80..3858767 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1683,7 +1683,9 @@ static int parse_options(char *options, struct super_block *sb,
 			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
 		datacheck:
 			if (is_remount) {
-				if (test_opt(sb, DATA_FLAGS) != data_opt) {
+				if (!sbi->s_journal)
+					ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
+				else if (test_opt(sb, DATA_FLAGS) != data_opt) {
 					ext4_msg(sb, KERN_ERR,
 						"Cannot change data mode on remount");
 					return 0;
@@ -3099,8 +3101,6 @@ static void ext4_destroy_lazyinit_thread(void)
 }
 
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
-				__releases(kernel_lock)
-				__acquires(kernel_lock)
 {
 	char *orig_data = kstrdup(data, GFP_KERNEL);
 	struct buffer_head *bh;
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index e673a88..b1ce4c7 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -40,6 +40,8 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
 
 	src = in->name;
 	srclen = in->len;
+	if (srclen > HFS_NAMELEN)
+		srclen = HFS_NAMELEN;
 	dst = out;
 	dstlen = HFS_MAX_NAMELEN;
 	if (nls_io) {
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 3f32bcb..ef175cb 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -16,38 +16,26 @@
 #include <linux/bitops.h>
 #include <linux/sched.h>
 
-static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
-
 static DEFINE_SPINLOCK(bitmap_lock);
 
-static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits)
+/*
+ * bitmap consists of blocks filled with 16bit words
+ * bit set == busy, bit clear == free
+ * endianness is a mess, but for counting zero bits it really doesn't matter...
+ */
+static __u32 count_free(struct buffer_head *map[], unsigned blocksize, __u32 numbits)
 {
-	unsigned i, j, sum = 0;
-	struct buffer_head *bh;
-  
-	for (i=0; i<numblocks-1; i++) {
-		if (!(bh=map[i])) 
-			return(0);
-		for (j=0; j<bh->b_size; j++)
-			sum += nibblemap[bh->b_data[j] & 0xf]
-				+ nibblemap[(bh->b_data[j]>>4) & 0xf];
-	}
+	__u32 sum = 0;
+	unsigned blocks = DIV_ROUND_UP(numbits, blocksize * 8);
 
-	if (numblocks==0 || !(bh=map[numblocks-1]))
-		return(0);
-	i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2;
-	for (j=0; j<i; j++) {
-		sum += nibblemap[bh->b_data[j] & 0xf]
-			+ nibblemap[(bh->b_data[j]>>4) & 0xf];
+	while (blocks--) {
+		unsigned words = blocksize / 2;
+		__u16 *p = (__u16 *)(*map++)->b_data;
+		while (words--)
+			sum += 16 - hweight16(*p++);
 	}
 
-	i = numbits%16;
-	if (i!=0) {
-		i = *(__u16 *)(&bh->b_data[j]) | ~((1<<i) - 1);
-		sum += nibblemap[i & 0xf] + nibblemap[(i>>4) & 0xf];
-		sum += nibblemap[(i>>8) & 0xf] + nibblemap[(i>>12) & 0xf];
-	}
-	return(sum);
+	return sum;
 }
 
 void minix_free_block(struct inode *inode, unsigned long block)
@@ -105,10 +93,12 @@ int minix_new_block(struct inode * inode)
 	return 0;
 }
 
-unsigned long minix_count_free_blocks(struct minix_sb_info *sbi)
+unsigned long minix_count_free_blocks(struct super_block *sb)
 {
-	return (count_free(sbi->s_zmap, sbi->s_zmap_blocks,
-		sbi->s_nzones - sbi->s_firstdatazone + 1)
+	struct minix_sb_info *sbi = minix_sb(sb);
+	u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
+
+	return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
 		<< sbi->s_log_zone_size);
 }
 
@@ -273,7 +263,10 @@ struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
 	return inode;
 }
 
-unsigned long minix_count_free_inodes(struct minix_sb_info *sbi)
+unsigned long minix_count_free_inodes(struct super_block *sb)
 {
-	return count_free(sbi->s_imap, sbi->s_imap_blocks, sbi->s_ninodes + 1);
+	struct minix_sb_info *sbi = minix_sb(sb);
+	u32 bits = sbi->s_ninodes + 1;
+
+	return count_free(sbi->s_imap, sb->s_blocksize, bits);
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 64cdcd6..1d9e339 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -279,6 +279,27 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
  	else if (sbi->s_mount_state & MINIX_ERROR_FS)
 		printk("MINIX-fs: mounting file system with errors, "
 			"running fsck is recommended\n");
+
+	/* Apparently minix can create filesystems that allocate more blocks for
+	 * the bitmaps than needed.  We simply ignore that, but verify it didn't
+	 * create one with not enough blocks and bail out if so.
+	 */
+	block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
+	if (sbi->s_imap_blocks < block) {
+		printk("MINIX-fs: file system does not have enough "
+				"imap blocks allocated.  Refusing to mount\n");
+		goto out_iput;
+	}
+
+	block = minix_blocks_needed(
+			(sbi->s_nzones - (sbi->s_firstdatazone + 1)),
+			s->s_blocksize);
+	if (sbi->s_zmap_blocks < block) {
+		printk("MINIX-fs: file system does not have enough "
+				"zmap blocks allocated.  Refusing to mount.\n");
+		goto out_iput;
+	}
+
 	return 0;
 
 out_iput:
@@ -339,10 +360,10 @@ static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_type = sb->s_magic;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
-	buf->f_bfree = minix_count_free_blocks(sbi);
+	buf->f_bfree = minix_count_free_blocks(sb);
 	buf->f_bavail = buf->f_bfree;
 	buf->f_files = sbi->s_ninodes;
-	buf->f_ffree = minix_count_free_inodes(sbi);
+	buf->f_ffree = minix_count_free_inodes(sb);
 	buf->f_namelen = sbi->s_namelen;
 	buf->f_fsid.val[0] = (u32)id;
 	buf->f_fsid.val[1] = (u32)(id >> 32);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 341e212..26bbd55 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -48,10 +48,10 @@ extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, stru
 extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
 extern struct inode * minix_new_inode(const struct inode *, int, int *);
 extern void minix_free_inode(struct inode * inode);
-extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi);
+extern unsigned long minix_count_free_inodes(struct super_block *sb);
 extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
-extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
+extern unsigned long minix_count_free_blocks(struct super_block *sb);
 extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
@@ -88,6 +88,11 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
 	return list_entry(inode, struct minix_inode_info, vfs_inode);
 }
 
+static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize)
+{
+	return DIV_ROUND_UP(bits, blocksize * 8);
+}
+
 #if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
 	defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
 
@@ -125,7 +130,7 @@ static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
 	if (!size)
 		return 0;
 
-	size = (size >> 4) + ((size & 15) > 0);
+	size >>= 4;
 	while (*p++ == 0xffff) {
 		if (--size == 0)
 			return (p - addr) << 4;
diff --git a/fs/namespace.c b/fs/namespace.c
index e5e1c7d..6d3a196 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2483,11 +2483,43 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 		__mnt_make_longterm(mnt);
 		new_ns->root = mnt;
 		list_add(&new_ns->list, &new_ns->root->mnt_list);
+	} else {
+		mntput(mnt);
 	}
 	return new_ns;
 }
 EXPORT_SYMBOL(create_mnt_ns);
 
+struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
+{
+	struct mnt_namespace *ns;
+	struct super_block *s;
+	struct path path;
+	int err;
+
+	ns = create_mnt_ns(mnt);
+	if (IS_ERR(ns))
+		return ERR_CAST(ns);
+
+	err = vfs_path_lookup(mnt->mnt_root, mnt,
+			name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
+
+	put_mnt_ns(ns);
+
+	if (err)
+		return ERR_PTR(err);
+
+	/* trade a vfsmount reference for active sb one */
+	s = path.mnt->mnt_sb;
+	atomic_inc(&s->s_active);
+	mntput(path.mnt);
+	/* lock the sucker */
+	down_write(&s->s_umount);
+	/* ... and return the root of (sub)tree on it */
+	return path.dentry;
+}
+EXPORT_SYMBOL(mount_subtree);
+
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95..ac28990 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0a1f831..eca56d4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -40,48 +40,8 @@
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
-static int nfs_file_open(struct inode *, struct file *);
-static int nfs_file_release(struct inode *, struct file *);
-static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
-static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
-					struct pipe_inode_info *pipe,
-					size_t count, unsigned int flags);
-static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
-static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
-					struct file *filp, loff_t *ppos,
-					size_t count, unsigned int flags);
-static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
-static int  nfs_file_flush(struct file *, fl_owner_t id);
-static int  nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
-static int nfs_check_flags(int flags);
-static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
-static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
-static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
-
 static const struct vm_operations_struct nfs_file_vm_ops;
 
-const struct file_operations nfs_file_operations = {
-	.llseek		= nfs_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= nfs_file_read,
-	.aio_write	= nfs_file_write,
-	.mmap		= nfs_file_mmap,
-	.open		= nfs_file_open,
-	.flush		= nfs_file_flush,
-	.release	= nfs_file_release,
-	.fsync		= nfs_file_fsync,
-	.lock		= nfs_lock,
-	.flock		= nfs_flock,
-	.splice_read	= nfs_file_splice_read,
-	.splice_write	= nfs_file_splice_write,
-	.check_flags	= nfs_check_flags,
-	.setlease	= nfs_setlease,
-};
-
 const struct inode_operations nfs_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
@@ -886,3 +846,54 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 			file->f_path.dentry->d_name.name, arg);
 	return -EINVAL;
 }
+
+const struct file_operations nfs_file_operations = {
+	.llseek		= nfs_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= nfs_file_read,
+	.aio_write	= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_file_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.splice_read	= nfs_file_splice_read,
+	.splice_write	= nfs_file_splice_write,
+	.check_flags	= nfs_check_flags,
+	.setlease	= nfs_setlease,
+};
+
+#ifdef CONFIG_NFS_V4
+static int
+nfs4_file_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
+	 * this point, then something is very wrong
+	 */
+	dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
+	return -ENOTDIR;
+}
+
+const struct file_operations nfs4_file_operations = {
+	.llseek		= nfs_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= nfs_file_read,
+	.aio_write	= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs4_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_file_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.splice_read	= nfs_file_splice_read,
+	.splice_write	= nfs_file_splice_write,
+	.check_flags	= nfs_check_flags,
+	.setlease	= nfs_setlease,
+};
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c07a55a..50a15fa 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		 */
 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
-			inode->i_fop = &nfs_file_operations;
+			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c1a1bd8..3f4d957 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -299,6 +299,8 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 		struct list_head *head);
 
+extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+		struct inode *inode);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690..d4bc9ed9 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
 	.file_inode_ops	= &nfs3_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b60fddf..be2bbac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2464,8 +2464,7 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst
 		case -NFS4ERR_BADNAME:
 			return -ENOENT;
 		case -NFS4ERR_MOVED:
-			err = nfs4_get_referral(dir, name, fattr, fhandle);
-			break;
+			return nfs4_get_referral(dir, name, fattr, fhandle);
 		case -NFS4ERR_WRONGSEC:
 			nfs_fixup_secinfo_attributes(fattr, fhandle);
 		}
@@ -6253,6 +6252,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
 	.file_inode_ops	= &nfs4_file_inode_operations,
+	.file_ops	= &nfs4_file_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index baf7353..8e672a2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1260,6 +1260,25 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
+static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+{
+	struct nfs_pageio_descriptor pgio;
+
+	put_lseg(data->lseg);
+	data->lseg = NULL;
+	dprintk("pnfs write error = %d\n", data->pnfs_error);
+
+	nfs_pageio_init_read_mds(&pgio, data->inode);
+
+	while (!list_empty(&data->pages)) {
+		struct nfs_page *req = nfs_list_entry(data->pages.next);
+
+		nfs_list_remove_request(req);
+		nfs_pageio_add_request(&pgio, req);
+	}
+	nfs_pageio_complete(&pgio);
+}
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1268,11 +1287,8 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
 	if (likely(!data->pnfs_error)) {
 		__nfs4_read_done_cb(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
-	} else {
-		put_lseg(data->lseg);
-		data->lseg = NULL;
-		dprintk("pnfs write error = %d\n", data->pnfs_error);
-	}
+	} else
+		pnfs_ld_handle_read_error(data);
 	data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b85..f48125d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
 	.file_inode_ops	= &nfs_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8b48ec6..cfa175c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -109,7 +109,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
 	}
 }
 
-static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
 		struct inode *inode)
 {
 	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
@@ -534,23 +534,13 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_full(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	struct nfs_pageio_descriptor pgio;
 
-	if (data->pnfs_error) {
-		nfs_pageio_init_read_mds(&pgio, data->inode);
-		pgio.pg_recoalesce = 1;
-	}
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 
 		nfs_list_remove_request(req);
-		if (!data->pnfs_error)
-			nfs_readpage_release(req);
-		else
-			nfs_pageio_add_request(&pgio, req);
+		nfs_readpage_release(req);
 	}
-	if (data->pnfs_error)
-		nfs_pageio_complete(&pgio);
 	nfs_readdata_release(calldata);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 480b3b6..1347774 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2787,43 +2787,18 @@ static void nfs_referral_loop_unprotect(void)
 static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path)
 {
-	struct mnt_namespace *ns_private;
-	struct super_block *s;
 	struct dentry *dentry;
-	struct path path;
-	int ret;
-
-	ns_private = create_mnt_ns(root_mnt);
-	ret = PTR_ERR(ns_private);
-	if (IS_ERR(ns_private))
-		goto out_mntput;
-
-	ret = nfs_referral_loop_protect();
-	if (ret != 0)
-		goto out_put_mnt_ns;
+	int ret = nfs_referral_loop_protect();
 
-	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
-			export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
+	if (ret) {
+		mntput(root_mnt);
+		return ERR_PTR(ret);
+	}
 
+	dentry = mount_subtree(root_mnt, export_path);
 	nfs_referral_loop_unprotect();
-	put_mnt_ns(ns_private);
-
-	if (ret != 0)
-		goto out_err;
-
-	s = path.mnt->mnt_sb;
-	atomic_inc(&s->s_active);
-	dentry = dget(path.dentry);
 
-	path_put(&path);
-	down_write(&s->s_umount);
 	return dentry;
-out_put_mnt_ns:
-	put_mnt_ns(ns_private);
-out_mntput:
-	mntput(root_mnt);
-out_err:
-	return ERR_PTR(ret);
 }
 
 static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2db1bd3..851ba3d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1652,46 +1652,12 @@ out:
 	return error;
 }
 
-static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		struct kstat *stat)
-{
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *task = get_proc_task(inode);
-	int rc;
-
-	if (task == NULL)
-		return -ESRCH;
-
-	rc = -EACCES;
-	if (lock_trace(task))
-		goto out_task;
-
-	generic_fillattr(inode, stat);
-	unlock_trace(task);
-	rc = 0;
-out_task:
-	put_task_struct(task);
-	return rc;
-}
-
 static const struct inode_operations proc_pid_link_inode_operations = {
 	.readlink	= proc_pid_readlink,
 	.follow_link	= proc_pid_follow_link,
 	.setattr	= proc_setattr,
 };
 
-static const struct inode_operations proc_fdinfo_link_inode_operations = {
-	.setattr	= proc_setattr,
-	.getattr	= proc_pid_fd_link_getattr,
-};
-
-static const struct inode_operations proc_fd_link_inode_operations = {
-	.readlink	= proc_pid_readlink,
-	.follow_link	= proc_pid_follow_link,
-	.setattr	= proc_setattr,
-	.getattr	= proc_pid_fd_link_getattr,
-};
-
 
 /* building an inode */
 
@@ -1923,61 +1889,49 @@ out:
 
 static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 {
-	struct task_struct *task;
-	struct files_struct *files;
+	struct task_struct *task = get_proc_task(inode);
+	struct files_struct *files = NULL;
 	struct file *file;
 	int fd = proc_fd(inode);
-	int rc;
-
-	task = get_proc_task(inode);
-	if (!task)
-		return -ENOENT;
-
-	rc = -EACCES;
-	if (lock_trace(task))
-		goto out_task;
-
-	rc = -ENOENT;
-	files = get_files_struct(task);
-	if (files == NULL)
-		goto out_unlock;
 
-	/*
-	 * We are not taking a ref to the file structure, so we must
-	 * hold ->file_lock.
-	 */
-	spin_lock(&files->file_lock);
-	file = fcheck_files(files, fd);
-	if (file) {
-		unsigned int f_flags;
-		struct fdtable *fdt;
-
-		fdt = files_fdtable(files);
-		f_flags = file->f_flags & ~O_CLOEXEC;
-		if (FD_ISSET(fd, fdt->close_on_exec))
-			f_flags |= O_CLOEXEC;
-
-		if (path) {
-			*path = file->f_path;
-			path_get(&file->f_path);
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+	if (files) {
+		/*
+		 * We are not taking a ref to the file structure, so we must
+		 * hold ->file_lock.
+		 */
+		spin_lock(&files->file_lock);
+		file = fcheck_files(files, fd);
+		if (file) {
+			unsigned int f_flags;
+			struct fdtable *fdt;
+
+			fdt = files_fdtable(files);
+			f_flags = file->f_flags & ~O_CLOEXEC;
+			if (FD_ISSET(fd, fdt->close_on_exec))
+				f_flags |= O_CLOEXEC;
+
+			if (path) {
+				*path = file->f_path;
+				path_get(&file->f_path);
+			}
+			if (info)
+				snprintf(info, PROC_FDINFO_MAX,
+					 "pos:\t%lli\n"
+					 "flags:\t0%o\n",
+					 (long long) file->f_pos,
+					 f_flags);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+			return 0;
 		}
-		if (info)
-			snprintf(info, PROC_FDINFO_MAX,
-				 "pos:\t%lli\n"
-				 "flags:\t0%o\n",
-				 (long long) file->f_pos,
-				 f_flags);
-		rc = 0;
-	} else
-		rc = -ENOENT;
-	spin_unlock(&files->file_lock);
-	put_files_struct(files);
-
-out_unlock:
-	unlock_trace(task);
-out_task:
-	put_task_struct(task);
-	return rc;
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	return -ENOENT;
 }
 
 static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2072,7 +2026,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
 	spin_unlock(&files->file_lock);
 	put_files_struct(files);
 
-	inode->i_op = &proc_fd_link_inode_operations;
+	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
 	ei->op.proc_get_link = proc_fd_link;
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2104,12 +2058,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
 	if (fd == ~0U)
 		goto out;
 
-	result = ERR_PTR(-EACCES);
-	if (lock_trace(task))
-		goto out;
-
 	result = instantiate(dir, dentry, task, &fd);
-	unlock_trace(task);
 out:
 	put_task_struct(task);
 out_no_task:
@@ -2129,28 +2078,23 @@ static int proc_readfd_common(struct file * filp, void * dirent,
 	retval = -ENOENT;
 	if (!p)
 		goto out_no_task;
-
-	retval = -EACCES;
-	if (lock_trace(p))
-		goto out;
-
 	retval = 0;
 
 	fd = filp->f_pos;
 	switch (fd) {
 		case 0:
 			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-				goto out_unlock;
+				goto out;
 			filp->f_pos++;
 		case 1:
 			ino = parent_ino(dentry);
 			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-				goto out_unlock;
+				goto out;
 			filp->f_pos++;
 		default:
 			files = get_files_struct(p);
 			if (!files)
-				goto out_unlock;
+				goto out;
 			rcu_read_lock();
 			for (fd = filp->f_pos-2;
 			     fd < files_fdtable(files)->max_fds;
@@ -2174,9 +2118,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
 			rcu_read_unlock();
 			put_files_struct(files);
 	}
-
-out_unlock:
-	unlock_trace(p);
 out:
 	put_task_struct(p);
 out_no_task:
@@ -2254,7 +2195,6 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 	ei->fd = fd;
 	inode->i_mode = S_IFREG | S_IRUSR;
 	inode->i_fop = &proc_fdinfo_file_operations;
-	inode->i_op = &proc_fdinfo_link_inode_operations;
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 33b1331..574d4ee 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -189,7 +189,7 @@ xfs_end_io(
 	int		error = 0;
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		error = -EIO;
+		ioend->io_error = -EIO;
 		goto done;
 	}
 	if (ioend->io_error)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1a35138..eac97ef 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -656,7 +656,7 @@ xfs_buf_item_committing(
 /*
  * This is the ops vector shared by all buf log items.
  */
-static struct xfs_item_ops xfs_buf_item_ops = {
+static const struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_size	= xfs_buf_item_size,
 	.iop_format	= xfs_buf_item_format,
 	.iop_pin	= xfs_buf_item_pin,
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index bb3f71d..0dee0b7 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -295,7 +295,7 @@ xfs_qm_dquot_logitem_committing(
 /*
  * This is the ops vector for dquots
  */
-static struct xfs_item_ops xfs_dquot_item_ops = {
+static const struct xfs_item_ops xfs_dquot_item_ops = {
 	.iop_size	= xfs_qm_dquot_logitem_size,
 	.iop_format	= xfs_qm_dquot_logitem_format,
 	.iop_pin	= xfs_qm_dquot_logitem_pin,
@@ -483,7 +483,7 @@ xfs_qm_qoff_logitem_committing(
 {
 }
 
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 	.iop_size	= xfs_qm_qoff_logitem_size,
 	.iop_format	= xfs_qm_qoff_logitem_format,
 	.iop_pin	= xfs_qm_qoff_logitem_pin,
@@ -498,7 +498,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 /*
  * This is the ops vector shared by all quotaoff-start log items.
  */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
 	.iop_size	= xfs_qm_qoff_logitem_size,
 	.iop_format	= xfs_qm_qoff_logitem_format,
 	.iop_pin	= xfs_qm_qoff_logitem_pin,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d22e626..35c2aff 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -217,7 +217,7 @@ xfs_efi_item_committing(
 /*
  * This is the ops vector shared by all efi log items.
  */
-static struct xfs_item_ops xfs_efi_item_ops = {
+static const struct xfs_item_ops xfs_efi_item_ops = {
 	.iop_size	= xfs_efi_item_size,
 	.iop_format	= xfs_efi_item_format,
 	.iop_pin	= xfs_efi_item_pin,
@@ -477,7 +477,7 @@ xfs_efd_item_committing(
 /*
  * This is the ops vector shared by all efd log items.
  */
-static struct xfs_item_ops xfs_efd_item_ops = {
+static const struct xfs_item_ops xfs_efd_item_ops = {
 	.iop_size	= xfs_efd_item_size,
 	.iop_format	= xfs_efd_item_format,
 	.iop_pin	= xfs_efd_item_pin,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b7cf21b..abaafdb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -795,7 +795,7 @@ xfs_inode_item_committing(
 /*
  * This is the ops vector shared by all buf log items.
  */
-static struct xfs_item_ops xfs_inode_item_ops = {
+static const struct xfs_item_ops xfs_inode_item_ops = {
 	.iop_size	= xfs_inode_item_size,
 	.iop_format	= xfs_inode_item_format,
 	.iop_pin	= xfs_inode_item_pin,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2758a62..a14cd89 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -626,7 +626,7 @@ xfs_log_item_init(
 	struct xfs_mount	*mp,
 	struct xfs_log_item	*item,
 	int			type,
-	struct xfs_item_ops	*ops)
+	const struct xfs_item_ops *ops)
 {
 	item->li_mountp = mp;
 	item->li_ailp = mp->m_ail;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 78c9039..3f7bf45 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -137,7 +137,7 @@ struct xfs_trans;
 void	xfs_log_item_init(struct xfs_mount	*mp,
 			struct xfs_log_item	*item,
 			int			type,
-			struct xfs_item_ops	*ops);
+			const struct xfs_item_ops *ops);
 
 xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
 		       struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5cff443..0bbb1a4 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -674,7 +674,8 @@ xfs_qm_dqattach_one(
 	 * disk and we didn't ask it to allocate;
 	 * ESRCH if quotas got turned off suddenly.
 	 */
-	error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+	error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+			     doalloc | XFS_QMOPT_DOWARN, &dqp);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 603f3eb..3ae713c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -326,7 +326,7 @@ typedef struct xfs_log_item {
 						 struct xfs_log_item *);
 							/* buffer item iodone */
 							/* callback func */
-	struct xfs_item_ops		*li_ops;	/* function list */
+	const struct xfs_item_ops	*li_ops;	/* function list */
 
 	/* delayed logging */
 	struct list_head		li_cil;		/* CIL pointers */
@@ -341,7 +341,7 @@ typedef struct xfs_log_item {
 	{ XFS_LI_IN_AIL,	"IN_AIL" }, \
 	{ XFS_LI_ABORTED,	"ABORTED" }
 
-typedef struct xfs_item_ops {
+struct xfs_item_ops {
 	uint (*iop_size)(xfs_log_item_t *);
 	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
 	void (*iop_pin)(xfs_log_item_t *);
@@ -352,7 +352,7 @@ typedef struct xfs_item_ops {
 	void (*iop_push)(xfs_log_item_t *);
 	bool (*iop_pushbuf)(xfs_log_item_t *);
 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-} xfs_item_ops_t;
+};
 
 #define IOP_SIZE(ip)		(*(ip)->li_ops->iop_size)(ip)
 #define IOP_FORMAT(ip,vp)	(*(ip)->li_ops->iop_format)(ip, vp)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4ecf2a5..ce9268a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -112,7 +112,7 @@ xfs_readlink(
 	char		*link)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	int		pathlen;
+	xfs_fsize_t	pathlen;
 	int		error = 0;
 
 	trace_xfs_readlink(ip);
@@ -122,13 +122,19 @@ xfs_readlink(
 
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 
-	ASSERT(S_ISLNK(ip->i_d.di_mode));
-	ASSERT(ip->i_d.di_size <= MAXPATHLEN);
-
 	pathlen = ip->i_d.di_size;
 	if (!pathlen)
 		goto out;
 
+	if (pathlen < 0 || pathlen > MAXPATHLEN) {
+		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
+			 __func__, (unsigned long long) ip->i_ino,
+			 (long long) pathlen);
+		ASSERT(0);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+
 	if (ip->i_df.if_flags & XFS_IFINLINE) {
 		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
 		link[pathlen] = '\0';