60 files changed, 758 insertions, 1063 deletions
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index cc1eadc..8358f1d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -70,7 +70,6 @@ xfs_allocbt_alloc_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	int			error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 75df77d..0479c32 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
 	/*
 	 * If this is O_DIRECT or the mpage code calling tell them how large
 	 * the mapping is, so that we can avoid repeated get_blocks calls.
+	 *
+	 * If the mapping spans EOF, then we have to break the mapping up as the
+	 * mapping for blocks beyond EOF must be marked new so that sub block
+	 * regions can be correctly zeroed. We can't do this for mappings within
+	 * EOF unless the mapping was just allocated or is unwritten, otherwise
+	 * the callers would overwrite existing data with zeros. Hence we have
+	 * to split the mapping into a range up to and including EOF, and a
+	 * second mapping for beyond EOF.
 	 */
 	if (direct || size > (1 << inode->i_blkbits)) {
 		xfs_off_t		mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
 		ASSERT(mapping_size > 0);
 		if (mapping_size > size)
 			mapping_size = size;
+		if (offset < i_size_read(inode) &&
+		    offset + mapping_size >= i_size_read(inode)) {
+			/* limit mapping to block that spans EOF */
+			mapping_size = roundup_64(i_size_read(inode) - offset,
+						  1 << inode->i_blkbits);
+		}
 		if (mapping_size > LONG_MAX)
 			mapping_size = LONG_MAX;
 
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed(
 
 		xfs_vm_kill_delalloc_range(inode, block_offset,
 					   block_offset + bh->b_size);
+
+		/*
+		 * This buffer does not contain data anymore. make sure anyone
+		 * who finds it knows that for certain.
+		 */
+		clear_buffer_delay(bh);
+		clear_buffer_uptodate(bh);
+		clear_buffer_mapped(bh);
+		clear_buffer_new(bh);
+		clear_buffer_dirty(bh);
 	}
 
 }
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin(
 	status = __block_write_begin(page, pos, len, xfs_get_blocks);
 	if (unlikely(status)) {
 		struct inode	*inode = mapping->host;
+		size_t		isize = i_size_read(inode);
 
 		xfs_vm_write_failed(inode, page, pos, len);
 		unlock_page(page);
 
-		if (pos + len > i_size_read(inode))
-			truncate_pagecache(inode, i_size_read(inode));
+		/*
+		 * If the write is beyond EOF, we only want to kill blocks
+		 * allocated in this write, not blocks that were previously
+		 * written successfully.
+		 */
+		if (pos + len > isize) {
+			ssize_t start = max_t(ssize_t, pos, isize);
+
+			truncate_pagecache_range(inode, start, pos + len);
+		}
 
 		page_cache_release(page);
 		page = NULL;
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin(
 }
 
 /*
- * On failure, we only need to kill delalloc blocks beyond EOF because they
- * will never be written. For blocks within EOF, generic_write_end() zeros them
- * so they are safe to leave alone and be written with all the other valid data.
+ * On failure, we only need to kill delalloc blocks beyond EOF in the range of
+ * this specific write because they will never be written. Previous writes
+ * beyond EOF where block allocation succeeded do not need to be trashed, so
+ * only new blocks from this write should be trashed. For blocks within
+ * EOF, generic_write_end() zeros them so they are safe to leave alone and be
+ * written with all the other valid data.
  */
 STATIC int
 xfs_vm_write_end(
@@ -1640,8 +1676,11 @@ xfs_vm_write_end(
 		loff_t		to = pos + len;
 
 		if (to > isize) {
-			truncate_pagecache(inode, isize);
+			/* only kill blocks in this write beyond EOF */
+			if (pos > isize)
+				isize = pos;
 			xfs_vm_kill_delalloc_range(inode, isize, to);
+			truncate_pagecache_range(inode, isize, to);
 		}
 	}
 	return ret;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01b6a01..abda112 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -213,7 +213,7 @@ xfs_attr_calc_size(
 		 * Out of line attribute, cannot double split, but
 		 * make room for the attribute value itself.
 		 */
-		uint	dblocks = XFS_B_TO_FSB(mp, valuelen);
+		uint	dblocks = xfs_attr3_rmt_blocks(mp, valuelen);
 		nblks += dblocks;
 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
 	}
@@ -698,11 +698,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
 		trace_xfs_attr_leaf_replace(args);
 
+		/* save the attribute state for later removal*/
 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
 		args->index2 = args->index;
 		args->rmtblkno2 = args->rmtblkno;
 		args->rmtblkcnt2 = args->rmtblkcnt;
+		args->rmtvaluelen2 = args->rmtvaluelen;
+
+		/*
+		 * clear the remote attr state now that it is saved so that the
+		 * values reflect the state of the attribute we are about to
+		 * add, not the attribute we just found and will remove later.
+		 */
+		args->rmtblkno = 0;
+		args->rmtblkcnt = 0;
+		args->rmtvaluelen = 0;
 	}
 
 	/*
@@ -794,6 +805,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 		args->blkno = args->blkno2;
 		args->rmtblkno = args->rmtblkno2;
 		args->rmtblkcnt = args->rmtblkcnt2;
+		args->rmtvaluelen = args->rmtvaluelen2;
 		if (args->rmtblkno) {
 			error = xfs_attr_rmtval_remove(args);
 			if (error)
@@ -999,13 +1011,22 @@ restart:
 
 		trace_xfs_attr_node_replace(args);
 
+		/* save the attribute state for later removal*/
 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
 		args->index2 = args->index;
 		args->rmtblkno2 = args->rmtblkno;
 		args->rmtblkcnt2 = args->rmtblkcnt;
+		args->rmtvaluelen2 = args->rmtvaluelen;
+
+		/*
+		 * clear the remote attr state now that it is saved so that the
+		 * values reflect the state of the attribute we are about to
+		 * add, not the attribute we just found and will remove later.
+		 */
 		args->rmtblkno = 0;
 		args->rmtblkcnt = 0;
+		args->rmtvaluelen = 0;
 	}
 
 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
@@ -1133,6 +1154,7 @@ restart:
 		args->blkno = args->blkno2;
 		args->rmtblkno = args->rmtblkno2;
 		args->rmtblkcnt = args->rmtblkcnt2;
+		args->rmtvaluelen = args->rmtvaluelen2;
 		if (args->rmtblkno) {
 			error = xfs_attr_rmtval_remove(args);
 			if (error)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe9587f..511c2834 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1229,6 +1229,7 @@ xfs_attr3_leaf_add_work(
 		name_rmt->valueblk = 0;
 		args->rmtblkno = 1;
 		args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+		args->rmtvaluelen = args->valuelen;
 	}
 	xfs_trans_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -2167,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
 			if (!xfs_attr_namesp_match(args->flags, entry->flags))
 				continue;
 			args->index = probe;
-			args->valuelen = be32_to_cpu(name_rmt->valuelen);
+			args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
 			args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 			args->rmtblkcnt = xfs_attr3_rmt_blocks(
 							args->dp->i_mount,
-							args->valuelen);
+							args->rmtvaluelen);
 			return XFS_ERROR(EEXIST);
 		}
 	}
@@ -2220,19 +2221,19 @@ xfs_attr3_leaf_getvalue(
 		name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
 		ASSERT(name_rmt->namelen == args->namelen);
 		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
-		valuelen = be32_to_cpu(name_rmt->valuelen);
+		args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
 		args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 		args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
-						       valuelen);
+						       args->rmtvaluelen);
 		if (args->flags & ATTR_KERNOVAL) {
-			args->valuelen = valuelen;
+			args->valuelen = args->rmtvaluelen;
 			return 0;
 		}
-		if (args->valuelen < valuelen) {
-			args->valuelen = valuelen;
+		if (args->valuelen < args->rmtvaluelen) {
+			args->valuelen = args->rmtvaluelen;
 			return XFS_ERROR(ERANGE);
 		}
-		args->valuelen = valuelen;
+		args->valuelen = args->rmtvaluelen;
 	}
 	return 0;
 }
@@ -2519,7 +2520,7 @@ xfs_attr3_leaf_clearflag(
 		ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
 		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-		name_rmt->valuelen = cpu_to_be32(args->valuelen);
+		name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
 		xfs_trans_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
 	}
@@ -2677,7 +2678,7 @@ xfs_attr3_leaf_flipflags(
 		ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
 		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-		name_rmt->valuelen = cpu_to_be32(args->valuelen);
+		name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
 		xfs_trans_log_buf(args->trans, bp1,
 			 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
 	}
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 01db96f..833fe5d9 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -447,6 +447,7 @@ xfs_attr3_leaf_list_int(
 				args.dp = context->dp;
 				args.whichfork = XFS_ATTR_FORK;
 				args.valuelen = valuelen;
+				args.rmtvaluelen = valuelen;
 				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
 				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
 				args.rmtblkcnt = xfs_attr3_rmt_blocks(
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 6e37823..0f0679a 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks(
  */
 static bool
 xfs_attr3_rmt_hdr_ok(
-	struct xfs_mount	*mp,
 	void			*ptr,
 	xfs_ino_t		ino,
 	uint32_t		offset,
@@ -251,7 +250,7 @@ xfs_attr_rmtval_copyout(
 		byte_cnt = min(*valuelen, byte_cnt);
 
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+			if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
 						  byte_cnt, bno)) {
 				xfs_alert(mp,
 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
@@ -337,7 +336,7 @@ xfs_attr_rmtval_get(
 	struct xfs_buf		*bp;
 	xfs_dablk_t		lblkno = args->rmtblkno;
 	__uint8_t		*dst = args->value;
-	int			valuelen = args->valuelen;
+	int			valuelen;
 	int			nmap;
 	int			error;
 	int			blkcnt = args->rmtblkcnt;
@@ -347,7 +346,9 @@ xfs_attr_rmtval_get(
 	trace_xfs_attr_rmtval_get(args);
 
 	ASSERT(!(args->flags & ATTR_KERNOVAL));
+	ASSERT(args->rmtvaluelen == args->valuelen);
 
+	valuelen = args->rmtvaluelen;
 	while (valuelen > 0) {
 		nmap = ATTR_RMTVALUE_MAPSIZE;
 		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
@@ -415,7 +416,7 @@ xfs_attr_rmtval_set(
 	 * attributes have headers, we can't just do a straight byte to FSB
 	 * conversion and have to take the header space into account.
 	 */
-	blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+	blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
 	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
 						   XFS_ATTR_FORK);
 	if (error)
@@ -480,7 +481,7 @@ xfs_attr_rmtval_set(
 	 */
 	lblkno = args->rmtblkno;
 	blkcnt = args->rmtblkcnt;
-	valuelen = args->valuelen;
+	valuelen = args->rmtvaluelen;
 	while (valuelen > 0) {
 		struct xfs_buf	*bp;
 		xfs_daddr_t	dblkno;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b6092e..1ff0da6 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -94,7 +94,7 @@ xfs_bmap_compute_maxlevels(
 		maxleafents = MAXAEXTNUM;
 		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
 	}
-	maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
+	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
 	minleafrecs = mp->m_bmap_dmnr[0];
 	minnoderecs = mp->m_bmap_dmnr[1];
 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -233,7 +233,6 @@ xfs_default_attroffset(
  */
 STATIC void
 xfs_bmap_forkoff_reset(
-	xfs_mount_t	*mp,
 	xfs_inode_t	*ip,
 	int		whichfork)
 {
@@ -905,7 +904,7 @@ xfs_bmap_local_to_extents_empty(
 	ASSERT(ifp->if_bytes == 0);
 	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
 
-	xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
+	xfs_bmap_forkoff_reset(ip, whichfork);
 	ifp->if_flags &= ~XFS_IFINLINE;
 	ifp->if_flags |= XFS_IFEXTENTS;
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
@@ -1675,7 +1674,6 @@ xfs_bmap_isaeof(
  */
 int
 xfs_bmap_last_offset(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	xfs_fileoff_t		*last_block,
 	int			whichfork)
@@ -3517,6 +3515,67 @@ xfs_bmap_adjacent(
 #undef ISVALID
 }
 
+static int
+xfs_bmap_longest_free_extent(
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		ag,
+	xfs_extlen_t		*blen,
+	int			*notinit)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	xfs_extlen_t		longest;
+	int			error = 0;
+
+	pag = xfs_perag_get(mp, ag);
+	if (!pag->pagf_init) {
+		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
+		if (error)
+			goto out;
+
+		if (!pag->pagf_init) {
+			*notinit = 1;
+			goto out;
+		}
+	}
+
+	longest = xfs_alloc_longest_free_extent(mp, pag);
+	if (*blen < longest)
+		*blen = longest;
+
+out:
+	xfs_perag_put(pag);
+	return error;
+}
+
+static void
+xfs_bmap_select_minlen(
+	struct xfs_bmalloca	*ap,
+	struct xfs_alloc_arg	*args,
+	xfs_extlen_t		*blen,
+	int			notinit)
+{
+	if (notinit || *blen < ap->minlen) {
+		/*
+		 * Since we did a BUF_TRYLOCK above, it is possible that
+		 * there is space for this request.
+		 */
+		args->minlen = ap->minlen;
+	} else if (*blen < args->maxlen) {
+		/*
+		 * If the best seen length is less than the request length,
+		 * use the best as the minimum.
+		 */
+		args->minlen = *blen;
+	} else {
+		/*
+		 * Otherwise we've seen an extent as big as maxlen, use that
+		 * as the minimum.
+		 */
+		args->minlen = args->maxlen;
+	}
+}
+
 STATIC int
 xfs_bmap_btalloc_nullfb(
 	struct xfs_bmalloca	*ap,
@@ -3524,111 +3583,74 @@ xfs_bmap_btalloc_nullfb(
 	xfs_extlen_t		*blen)
 {
 	struct xfs_mount	*mp = ap->ip->i_mount;
-	struct xfs_perag	*pag;
 	xfs_agnumber_t		ag, startag;
 	int			notinit = 0;
 	int			error;
 
-	if (ap->userdata && xfs_inode_is_filestream(ap->ip))
-		args->type = XFS_ALLOCTYPE_NEAR_BNO;
-	else
-		args->type = XFS_ALLOCTYPE_START_BNO;
+	args->type = XFS_ALLOCTYPE_START_BNO;
 	args->total = ap->total;
 
-	/*
-	 * Search for an allocation group with a single extent large enough
-	 * for the request.  If one isn't found, then adjust the minimum
-	 * allocation size to the largest space found.
-	 */
 	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
 	if (startag == NULLAGNUMBER)
 		startag = ag = 0;
 
-	pag = xfs_perag_get(mp, ag);
 	while (*blen < args->maxlen) {
-		if (!pag->pagf_init) {
-			error = xfs_alloc_pagf_init(mp, args->tp, ag,
-						    XFS_ALLOC_FLAG_TRYLOCK);
-			if (error) {
-				xfs_perag_put(pag);
-				return error;
-			}
-		}
-
-		/*
-		 * See xfs_alloc_fix_freelist...
-		 */
-		if (pag->pagf_init) {
-			xfs_extlen_t	longest;
-			longest = xfs_alloc_longest_free_extent(mp, pag);
-			if (*blen < longest)
-				*blen = longest;
-		} else
-			notinit = 1;
-
-		if (xfs_inode_is_filestream(ap->ip)) {
-			if (*blen >= args->maxlen)
-				break;
-
-			if (ap->userdata) {
-				/*
-				 * If startag is an invalid AG, we've
-				 * come here once before and
-				 * xfs_filestream_new_ag picked the
-				 * best currently available.
-				 *
-				 * Don't continue looping, since we
-				 * could loop forever.
-				 */
-				if (startag == NULLAGNUMBER)
-					break;
-
-				error = xfs_filestream_new_ag(ap, &ag);
-				xfs_perag_put(pag);
-				if (error)
-					return error;
+		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+						     &notinit);
+		if (error)
+			return error;
 
-				/* loop again to set 'blen'*/
-				startag = NULLAGNUMBER;
-				pag = xfs_perag_get(mp, ag);
-				continue;
-			}
-		}
 		if (++ag == mp->m_sb.sb_agcount)
 			ag = 0;
 		if (ag == startag)
 			break;
-		xfs_perag_put(pag);
-		pag = xfs_perag_get(mp, ag);
 	}
-	xfs_perag_put(pag);
 
-	/*
-	 * Since the above loop did a BUF_TRYLOCK, it is
-	 * possible that there is space for this request.
-	 */
-	if (notinit || *blen < ap->minlen)
-		args->minlen = ap->minlen;
-	/*
-	 * If the best seen length is less than the request
-	 * length, use the best as the minimum.
-	 */
-	else if (*blen < args->maxlen)
-		args->minlen = *blen;
-	/*
-	 * Otherwise we've seen an extent as big as maxlen,
-	 * use that as the minimum.
-	 */
-	else
-		args->minlen = args->maxlen;
+	xfs_bmap_select_minlen(ap, args, blen, notinit);
+	return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc_filestreams(
+	struct xfs_bmalloca	*ap,
+	struct xfs_alloc_arg	*args,
+	xfs_extlen_t		*blen)
+{
+	struct xfs_mount	*mp = ap->ip->i_mount;
+	xfs_agnumber_t		ag;
+	int			notinit = 0;
+	int			error;
+
+	args->type = XFS_ALLOCTYPE_NEAR_BNO;
+	args->total = ap->total;
+
+	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
+	if (ag == NULLAGNUMBER)
+		ag = 0;
+
+	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
+	if (error)
+		return error;
+
+	if (*blen < args->maxlen) {
+		error = xfs_filestream_new_ag(ap, &ag);
+		if (error)
+			return error;
+
+		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+						     &notinit);
+		if (error)
+			return error;
+
+	}
+
+	xfs_bmap_select_minlen(ap, args, blen, notinit);
 
 	/*
-	 * set the failure fallback case to look in the selected
-	 * AG as the stream may have moved.
+	 * Set the failure fallback case to look in the selected AG as stream
+	 * may have moved.
 	 */
-	if (xfs_inode_is_filestream(ap->ip))
-		ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
-
+	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
 	return 0;
 }
 
@@ -3708,7 +3730,15 @@ xfs_bmap_btalloc(
 	args.firstblock = *ap->firstblock;
 	blen = 0;
 	if (nullfb) {
-		error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
+		/*
+		 * Search for an allocation group with a single extent large
+		 * enough for the request.  If one isn't found, then adjust
+		 * the minimum allocation size to the largest space found.
+		 */
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
+		else
+			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
 		if (error)
 			return error;
 	} else if (ap->flist->xbf_low) {
@@ -5413,6 +5443,7 @@ xfs_bmap_shift_extents(
 	int				whichfork = XFS_DATA_FORK;
 	int				logflags;
 	xfs_filblks_t			blockcount = 0;
+	int				total_extents;
 
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5429,7 +5460,6 @@ xfs_bmap_shift_extents(
 	ASSERT(current_ext != NULL);
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		/* Read in all the extents */
 		error = xfs_iread_extents(tp, ip, whichfork);
@@ -5456,7 +5486,6 @@ xfs_bmap_shift_extents(
 
 	/* We are going to change core inode */
 	logflags = XFS_ILOG_CORE;
-
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
@@ -5467,8 +5496,14 @@ xfs_bmap_shift_extents(
 		logflags |= XFS_ILOG_DEXT;
 	}
 
-	while (nexts++ < num_exts &&
-	       *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+	/*
+	 * There may be delalloc extents in the data fork before the range we
+	 * are collapsing out, so we cannot
+	 * use the count of real extents here. Instead we have to calculate it
+	 * from the incore fork.
+	 */
+	total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+	while (nexts++ < num_exts && *current_ext < total_extents) {
 
 		gotp = xfs_iext_get_ext(ifp, *current_ext);
 		xfs_bmbt_get_all(gotp, &got);
@@ -5556,10 +5591,11 @@ xfs_bmap_shift_extents(
 		}
 
 		(*current_ext)++;
+		total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
 	}
 
 	/* Check if we are done */
-	if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+	if (*current_ext == total_extents)
 		*done = 1;
 
 del_cursor:
@@ -5568,6 +5604,5 @@ del_cursor:
 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 
 	xfs_trans_log_inode(tp, ip, logflags);
-
 	return error;
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f84bd7a..38ba36e 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -156,8 +156,8 @@ int	xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
 int	xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *last_block, int whichfork);
-int	xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
-		xfs_fileoff_t *unused, int whichfork);
+int	xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
+		int whichfork);
 int	xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
 int	xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		int whichfork);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 818d546..948836c 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -84,7 +84,7 @@ xfs_bmdr_to_bmbt(
 	rblock->bb_level = dblock->bb_level;
 	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
 	rblock->bb_numrecs = dblock->bb_numrecs;
-	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
 	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
 	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
 	fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
@@ -443,7 +443,7 @@ xfs_bmbt_to_bmdr(
 	ASSERT(rblock->bb_level != 0);
 	dblock->bb_level = rblock->bb_level;
 	dblock->bb_numrecs = rblock->bb_numrecs;
-	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
 	fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
 	tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
 	fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
@@ -519,7 +519,6 @@ xfs_bmbt_alloc_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
@@ -672,8 +671,7 @@ xfs_bmbt_get_dmaxrecs(
 {
 	if (level != cur->bc_nlevels - 1)
 		return cur->bc_mp->m_bmap_dmxr[level != 0];
-	return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
-				level == 0);
+	return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
 }
 
 STATIC void
@@ -914,7 +912,6 @@ xfs_bmbt_maxrecs(
  */
 int
 xfs_bmdr_maxrecs(
-	struct xfs_mount	*mp,
 	int			blocklen,
 	int			leaf)
 {
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6e42e1e..819a8a4 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -130,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
 			xfs_bmdr_block_t *, int);
 
 extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
-extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
 extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
 
 extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 01f6a64..296160b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1418,6 +1418,8 @@ xfs_zero_file_space(
 	xfs_off_t		end_boundary;
 	int			error;
 
+	trace_xfs_zero_file_space(ip);
+
 	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
 
 	/*
@@ -1432,9 +1434,18 @@ xfs_zero_file_space(
 	ASSERT(end_boundary <= offset + len);
 
 	if (start_boundary < end_boundary - 1) {
-		/* punch out the page cache over the conversion range */
+		/*
+		 * punch out delayed allocation blocks and the page cache over
+		 * the conversion range
+		 */
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_bmap_punch_delalloc_range(ip,
+				XFS_B_TO_FSBT(mp, start_boundary),
+				XFS_B_TO_FSB(mp, end_boundary - start_boundary));
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		truncate_pagecache_range(VFS_I(ip), start_boundary,
 					 end_boundary - 1);
+
 		/* convert the blocks */
 		error = xfs_alloc_file_space(ip, start_boundary,
 					end_boundary - start_boundary - 1,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index b920745..182bac2 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -1161,7 +1161,6 @@ STATIC int
 xfs_btree_read_buf_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*ptr,
-	int			level,
 	int			flags,
 	struct xfs_btree_block	**block,
 	struct xfs_buf		**bpp)
@@ -1519,8 +1518,8 @@ xfs_btree_increment(
 		union xfs_btree_ptr	*ptrp;
 
 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
-							0, &block, &bp);
+		--lev;
+		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
 		if (error)
 			goto error0;
 
@@ -1618,8 +1617,8 @@ xfs_btree_decrement(
 		union xfs_btree_ptr	*ptrp;
 
 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
-							0, &block, &bp);
+		--lev;
+		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
 		if (error)
 			goto error0;
 		xfs_btree_setbuf(cur, lev, bp);
@@ -1669,7 +1668,7 @@ xfs_btree_lookup_get_block(
 		return 0;
 	}
 
-	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
+	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
 	if (error)
 		return error;
 
@@ -2020,7 +2019,7 @@ xfs_btree_lshift(
 		goto out0;
 
 	/* Set up the left neighbor as "left". */
-	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
+	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 	if (error)
 		goto error0;
 
@@ -2204,7 +2203,7 @@ xfs_btree_rshift(
 		goto out0;
 
 	/* Set up the right neighbor as "right". */
-	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
+	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 	if (error)
 		goto error0;
 
@@ -2374,7 +2373,7 @@ xfs_btree_split(
 	xfs_btree_buf_to_ptr(cur, lbp, &lptr);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0)
@@ -2472,7 +2471,7 @@ xfs_btree_split(
 	 * point back to right instead of to left.
 	 */
 	if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
-		error = xfs_btree_read_buf_block(cur, &rrptr, level,
+		error = xfs_btree_read_buf_block(cur, &rrptr,
 							0, &rrblock, &rrbp);
 		if (error)
 			goto error0;
@@ -2547,7 +2546,7 @@ xfs_btree_new_iroot(
 	pp = xfs_btree_ptr_addr(cur, 1, block);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0) {
@@ -2651,7 +2650,7 @@ xfs_btree_new_root(
 	cur->bc_ops->init_ptr_from_cur(cur, &rptr);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0)
@@ -2686,8 +2685,7 @@ xfs_btree_new_root(
 		lbp = bp;
 		xfs_btree_buf_to_ptr(cur, lbp, &lptr);
 		left = block;
-		error = xfs_btree_read_buf_block(cur, &rptr,
-					cur->bc_nlevels - 1, 0, &right, &rbp);
+		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 		if (error)
 			goto error0;
 		bp = rbp;
@@ -2698,8 +2696,7 @@ xfs_btree_new_root(
 		xfs_btree_buf_to_ptr(cur, rbp, &rptr);
 		right = block;
 		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
-		error = xfs_btree_read_buf_block(cur, &lptr,
-					cur->bc_nlevels - 1, 0, &left, &lbp);
+		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 		if (error)
 			goto error0;
 		bp = lbp;
@@ -3651,8 +3648,7 @@ xfs_btree_delrec(
 		rptr = cptr;
 		right = block;
 		rbp = bp;
-		error = xfs_btree_read_buf_block(cur, &lptr, level,
-							0, &left, &lbp);
+		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 		if (error)
 			goto error0;
 
@@ -3669,8 +3665,7 @@ xfs_btree_delrec(
 		lptr = cptr;
 		left = block;
 		lbp = bp;
-		error = xfs_btree_read_buf_block(cur, &rptr, level,
-							0, &right, &rbp);
+		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 		if (error)
 			goto error0;
 
@@ -3742,8 +3737,7 @@ xfs_btree_delrec(
 	/* If there is a right sibling, point it to the remaining block. */
 	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
 	if (!xfs_btree_ptr_is_null(cur, &cptr)) {
-		error = xfs_btree_read_buf_block(cur, &cptr, level,
-							0, &rrblock, &rrbp);
+		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
 		if (error)
 			goto error0;
 		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index d2ac586..a04b694 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -132,7 +132,7 @@ struct xfs_btree_ops {
 	int	(*alloc_block)(struct xfs_btree_cur *cur,
 			       union xfs_btree_ptr *start_bno,
 			       union xfs_btree_ptr *new_bno,
-			       int length, int *stat);
+			       int *stat);
 	int	(*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
 
 	/* update last record information */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 107f2fd..7a34a1a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -216,8 +216,7 @@ _xfs_buf_alloc(
 STATIC int
 _xfs_buf_get_pages(
 	xfs_buf_t		*bp,
-	int			page_count,
-	xfs_buf_flags_t		flags)
+	int			page_count)
 {
 	/* Make sure that we have a page list */
 	if (bp->b_pages == NULL) {
@@ -330,7 +329,7 @@ use_alloc_page:
 	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
 								>> PAGE_SHIFT;
 	page_count = end - start;
-	error = _xfs_buf_get_pages(bp, page_count, flags);
+	error = _xfs_buf_get_pages(bp, page_count);
 	if (unlikely(error))
 		return error;
 
@@ -778,7 +777,7 @@ xfs_buf_associate_memory(
 	bp->b_pages = NULL;
 	bp->b_addr = mem;
 
-	rval = _xfs_buf_get_pages(bp, page_count, 0);
+	rval = _xfs_buf_get_pages(bp, page_count);
 	if (rval)
 		return rval;
 
@@ -811,7 +810,7 @@ xfs_buf_get_uncached(
 		goto fail;
 
 	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
-	error = _xfs_buf_get_pages(bp, page_count, 0);
+	error = _xfs_buf_get_pages(bp, page_count);
 	if (error)
 		goto fail_free_buf;
 
@@ -1372,21 +1371,29 @@ xfs_buf_iorequest(
 		xfs_buf_wait_unpin(bp);
 	xfs_buf_hold(bp);
 
-	/* Set the count to 1 initially, this will stop an I/O
+	/*
+	 * Set the count to 1 initially, this will stop an I/O
 	 * completion callout which happens before we have started
 	 * all the I/O from calling xfs_buf_ioend too early.
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
 	_xfs_buf_ioapply(bp);
-	_xfs_buf_ioend(bp, 1);
+	/*
+	 * If _xfs_buf_ioapply failed, we'll get back here with
+	 * only the reference we took above.  _xfs_buf_ioend will
+	 * drop it to zero, so we'd better not queue it for later,
+	 * or we'll free it before it's done.
+	 */
+	_xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
 
 	xfs_buf_rele(bp);
 }
 
 /*
  * Waits for I/O to complete on the buffer supplied.  It returns immediately if
- * no I/O is pending or there is already a pending error on the buffer.  It
- * returns the I/O error code, if any, or 0 if there was no error.
+ * no I/O is pending or there is already a pending error on the buffer, in which
+ * case nothing will ever complete.  It returns the I/O error code, if any, or
+ * 0 if there was no error.
  */
 int
 xfs_buf_iowait(
@@ -1607,7 +1614,6 @@ xfs_free_buftarg(
 int
 xfs_setsize_buftarg(
 	xfs_buftarg_t		*btp,
-	unsigned int		blocksize,
 	unsigned int		sectorsize)
 {
 	/* Set up metadata sector size info */
@@ -1642,16 +1648,13 @@ xfs_setsize_buftarg_early(
 	xfs_buftarg_t		*btp,
 	struct block_device	*bdev)
 {
-	return xfs_setsize_buftarg(btp, PAGE_SIZE,
-				   bdev_logical_block_size(bdev));
+	return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
 }
 
 xfs_buftarg_t *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
-	struct block_device	*bdev,
-	int			external,
-	const char		*fsname)
+	struct block_device	*bdev)
 {
 	xfs_buftarg_t		*btp;
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b8a3abf..0e47fd1 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -387,10 +387,10 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
  *	Handling of buftargs.
  */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-			struct block_device *, int, const char *);
+			struct block_device *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
 
 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8752821..64b17f5 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -812,7 +812,6 @@ xfs_buf_item_init(
  */
 static void
 xfs_buf_item_log_segment(
-	struct xfs_buf_log_item	*bip,
 	uint			first,
 	uint			last,
 	uint			*map)
@@ -920,7 +919,7 @@ xfs_buf_item_log(
 		if (end > last)
 			end = last;
 
-		xfs_buf_item_log_segment(bip, first, end,
+		xfs_buf_item_log_segment(first, end,
 					 &bip->bli_formats[i].blf_data_map[0]);
 
 		start += bp->b_maps[i].bm_len;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6cc5f67..9eec594 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2462,7 +2462,6 @@ xfs_buf_map_from_irec(
  */
 static int
 xfs_dabuf_map(
-	struct xfs_trans	*trans,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
@@ -2558,7 +2557,7 @@ xfs_da_get_buf(
 	*bpp = NULL;
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */
@@ -2606,7 +2605,7 @@ xfs_da_read_buf(
 	*bpp = NULL;
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */
@@ -2679,7 +2678,6 @@ out_free:
  */
 xfs_daddr_t
 xfs_da_reada_buf(
-	struct xfs_trans	*trans,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
@@ -2693,7 +2691,7 @@ xfs_da_reada_buf(
 
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6e95ea7..c824a0a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -60,10 +60,12 @@ typedef struct xfs_da_args {
 	int		index;		/* index of attr of interest in blk */
 	xfs_dablk_t	rmtblkno;	/* remote attr value starting blkno */
 	int		rmtblkcnt;	/* remote attr value block count */
+	int		rmtvaluelen;	/* remote attr value length in bytes */
 	xfs_dablk_t	blkno2;		/* blkno of 2nd attr leaf of interest */
 	int		index2;		/* index of 2nd attr in blk */
 	xfs_dablk_t	rmtblkno2;	/* remote attr value starting blkno */
 	int		rmtblkcnt2;	/* remote attr value block count */
+	int		rmtvaluelen2;	/* remote attr value length in bytes */
 	int		op_flags;	/* operation flags */
 	enum xfs_dacmp	cmpresult;	/* name compare result for lookups */
 } xfs_da_args_t;
@@ -183,9 +185,9 @@ int	xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 			       xfs_dablk_t bno, xfs_daddr_t mappedbno,
 			       struct xfs_buf **bpp, int whichfork,
 			       const struct xfs_buf_ops *ops);
-xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
-				xfs_dablk_t bno, xfs_daddr_t mapped_bno,
-				int whichfork, const struct xfs_buf_ops *ops);
+xfs_daddr_t	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+				xfs_daddr_t mapped_bno, int whichfork,
+				const struct xfs_buf_ops *ops);
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  struct xfs_buf *dead_buf);
 
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_da_format.h
index a19d3f8..1432b57 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/xfs_da_format.h
@@ -541,7 +541,7 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
  * Convert dataptr to byte in file space
  */
 static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
 {
 	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
 }
@@ -550,7 +550,7 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
  * Convert byte in file space to dataptr.  It had better be aligned.
  */
 static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
 {
 	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
 }
@@ -571,7 +571,7 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 static inline xfs_dir2_db_t
 xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(dp));
 }
 
 /*
@@ -590,7 +590,7 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(dp));
 }
 
 /*
@@ -629,7 +629,7 @@ static inline xfs_dir2_dataptr_t
 xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
 			   xfs_dir2_data_aoff_t o)
 {
-	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
+	return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(mp, db, o));
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index fda4625..e365c98 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -244,7 +244,7 @@ xfs_dir_createname(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -252,7 +252,7 @@ xfs_dir_createname(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -336,7 +336,7 @@ xfs_dir_lookup(
 		goto out_check_rval;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -344,7 +344,7 @@ xfs_dir_lookup(
 		goto out_check_rval;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -408,7 +408,7 @@ xfs_dir_removename(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -416,7 +416,7 @@ xfs_dir_removename(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -472,7 +472,7 @@ xfs_dir_replace(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -480,7 +480,7 @@ xfs_dir_replace(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -531,7 +531,7 @@ xfs_dir_canenter(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -539,7 +539,7 @@ xfs_dir_canenter(
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(dp, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -607,7 +607,6 @@ xfs_dir2_grow_inode(
  */
 int
 xfs_dir2_isblock(
-	xfs_trans_t	*tp,
 	xfs_inode_t	*dp,
 	int		*vp)		/* out: 1 is block, 0 is not block */
 {
@@ -616,7 +615,7 @@ xfs_dir2_isblock(
 	int		rval;
 
 	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+	if ((rval = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK)))
 		return rval;
 	rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
 	ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
@@ -629,7 +628,6 @@ xfs_dir2_isblock(
  */
 int
 xfs_dir2_isleaf(
-	xfs_trans_t	*tp,
 	xfs_inode_t	*dp,
 	int		*vp)		/* out: 1 is leaf, 0 is not leaf */
 {
@@ -638,7 +636,7 @@ xfs_dir2_isleaf(
 	int		rval;
 
 	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+	if ((rval = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK)))
 		return rval;
 	*vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
 	return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index cec70e0..64a6b19 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -142,8 +142,8 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
 /*
  * Interface routines used by userspace utilities
  */
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isblock(struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_inode *dp, int *r);
 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
 				struct xfs_buf *bp);
 
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 4f6a38c..dd9d005 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -319,7 +319,6 @@ xfs_dir2_block_compact(
 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
 		(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
 		needlog, &needscan);
-	blp += be32_to_cpu(btp->stale) - 1;
 	btp->stale = cpu_to_be32(1);
 	/*
 	 * If we now need to rebuild the bestfree map, do so.
@@ -537,7 +536,7 @@ xfs_dir2_block_addname(
 	 * Fill in the leaf entry.
 	 */
 	blp[mid].hashval = cpu_to_be32(args->hashval);
-	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
@@ -1170,7 +1169,7 @@ xfs_dir2_sf_to_block(
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(tp, dp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	/*
 	 * Create entry for ..
@@ -1184,7 +1183,7 @@ xfs_dir2_sf_to_block(
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(tp, dp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	offset = dp->d_ops->data_first_offset;
 	/*
@@ -1238,7 +1237,7 @@ xfs_dir2_sf_to_block(
 		name.len = sfep->namelen;
 		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
 							hashname(&name));
-		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 						 (char *)dep - (char *)hdr));
 		offset = (int)((char *)(tagp + 1) - (char *)hdr);
 		if (++i == sfp->count)
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index afa4ad5..bae8b5b 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -329,12 +329,11 @@ xfs_dir3_data_read(
 
 int
 xfs_dir3_data_readahead(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mapped_bno)
 {
-	return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
+	return xfs_da_reada_buf(dp, bno, mapped_bno,
 				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
 }
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index d36e97d..f571723 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1708,7 +1708,7 @@ xfs_dir2_node_to_leaf(
 	/*
 	 * Get the last offset in the file.
 	 */
-	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
+	if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
 		return error;
 	}
 	fo -= mp->m_dirblkfsbs;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index cb434d7..9cb91ee 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1727,7 +1727,7 @@ xfs_dir2_node_addname_int(
 	if (dbno == -1) {
 		xfs_fileoff_t	fo;		/* freespace block number */
 
-		if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
+		if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
 			return error;
 		lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
 		fbno = ifbno;
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 8b9d228..2429960 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -54,8 +54,8 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
 extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
-extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
-		xfs_dablk_t bno, xfs_daddr_t mapped_bno);
+extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
+		xfs_daddr_t mapped_bno);
 
 extern struct xfs_dir2_data_free *
 xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index aead369..50b72f7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -434,7 +434,7 @@ xfs_dir2_leaf_readbuf(
 		 */
 		if (i > mip->ra_current &&
 		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
-			xfs_dir3_data_readahead(NULL, dp,
+			xfs_dir3_data_readahead(dp,
 				map[mip->ra_index].br_startoff + mip->ra_offset,
 				XFS_FSB_TO_DADDR(mp,
 					map[mip->ra_index].br_startblock +
@@ -447,7 +447,7 @@ xfs_dir2_leaf_readbuf(
 		 * use our mapping, but this is a very rare case.
 		 */
 		else if (i > mip->ra_current) {
-			xfs_dir3_data_readahead(NULL, dp,
+			xfs_dir3_data_readahead(dp,
 					map[mip->ra_index].br_startoff +
 							mip->ra_offset, -1);
 			mip->ra_current = i;
@@ -531,7 +531,7 @@ xfs_dir2_leaf_getdents(
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
-	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
+	curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
 
 	/*
 	 * Force this conversion through db so we truncate the offset
@@ -635,7 +635,7 @@ xfs_dir2_leaf_getdents(
 		length = dp->d_ops->data_entsize(dep->namelen);
 		filetype = dp->d_ops->data_get_ftype(dep);
 
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
 			    be64_to_cpu(dep->inumber),
 			    xfs_dir3_get_dtype(mp, filetype)))
@@ -653,10 +653,10 @@ xfs_dir2_leaf_getdents(
 	/*
 	 * All done.  Set output offset value to current offset.
 	 */
-	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
+	if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR))
 		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
 	else
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 	kmem_free(map_info);
 	if (bp)
 		xfs_trans_brelse(NULL, bp);
@@ -687,7 +687,7 @@ xfs_readdir(
 	lock_mode = xfs_ilock_data_map_shared(dp);
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_getdents(dp, ctx);
-	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
+	else if ((rval = xfs_dir2_isblock(dp, &v)))
 		;
 	else if (v)
 		rval = xfs_dir2_block_getdents(dp, ctx);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 3725fb1..7aab8ec 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -285,14 +285,12 @@ int						/* error */
 xfs_dir2_sf_addname(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	int			add_entsize;	/* size of the new entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
 	int			incr_isize;	/* total change in size */
 	int			new_isize;	/* di_size after adding name */
 	int			objchange;	/* changing to 8-byte inodes */
 	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
-	int			old_isize;	/* di_size before adding name */
 	int			pick;		/* which algorithm to use */
 	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
 	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
@@ -316,8 +314,7 @@ xfs_dir2_sf_addname(
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
-	incr_isize = add_entsize;
+	incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
 	objchange = 0;
 #if XFS_BIG_INUMS
 	/*
@@ -325,11 +322,8 @@ xfs_dir2_sf_addname(
 	 */
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
 		/*
-		 * Yes, adjust the entry size and the total size.
+		 * Yes, adjust the inode size.  old count + (parent + new)
 		 */
-		add_entsize +=
-			(uint)sizeof(xfs_dir2_ino8_t) -
-			(uint)sizeof(xfs_dir2_ino4_t);
 		incr_isize +=
 			(sfp->count + 2) *
 			((uint)sizeof(xfs_dir2_ino8_t) -
@@ -337,8 +331,7 @@ xfs_dir2_sf_addname(
 		objchange = 1;
 	}
 #endif
-	old_isize = (int)dp->i_d.di_size;
-	new_isize = old_isize + incr_isize;
+	new_isize = (int)dp->i_d.di_size + incr_isize;
 	/*
 	 * Won't fit as shortform any more (due to size),
 	 * or the pick routine says it won't (due to offset values).
@@ -1110,9 +1103,9 @@ xfs_dir2_sf_toino4(
 }
 
 /*
- * Convert from 4-byte inode numbers to 8-byte inode numbers.
- * The new 8-byte inode number is not there yet, we leave with the
- * count 1 but no corresponding entry.
+ * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
+ * The new entry w/ an 8-byte inode number is not there yet; we leave with
+ * i8count set to 1, but no corresponding 8-byte entry.
  */
 static void
 xfs_dir2_sf_toino8(
@@ -1145,7 +1138,7 @@ xfs_dir2_sf_toino8(
 	ASSERT(oldsfp->i8count == 0);
 	memcpy(buf, oldsfp, oldsize);
 	/*
-	 * Compute the new inode size.
+	 * Compute the new inode size (nb: entry count + 1 for parent)
 	 */
 	newsize =
 		oldsize +
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index 610da81..c2ac0c6 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -35,7 +35,6 @@
 
 int
 xfs_calc_dquots_per_chunk(
-	struct xfs_mount	*mp,
 	unsigned int		nbblks)	/* basic block units */
 {
 	unsigned int	ndquots;
@@ -194,7 +193,7 @@ xfs_dquot_buf_verify_crc(
 	if (mp->m_quotainfo)
 		ndquots = mp->m_quotainfo->qi_dqperchunk;
 	else
-		ndquots = xfs_calc_dquots_per_chunk(mp,
+		ndquots = xfs_calc_dquots_per_chunk(
 					XFS_BB_TO_FSB(mp, bp->b_length));
 
 	for (i = 0; i < ndquots; i++, d++) {
@@ -225,7 +224,7 @@ xfs_dquot_buf_verify(
 	if (mp->m_quotainfo)
 		ndquots = mp->m_quotainfo->qi_dqperchunk;
 	else
-		ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length);
+		ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
 
 	/*
 	 * On the first read of the buffer, verify that each dquot is valid.
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1399e18..753e467 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
 
 	if (!lsn)
 		return 0;
-	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+	return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 
 const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 79e96ce..1b8160d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -155,7 +155,7 @@ xfs_dir_fsync(
 
 	if (!lsn)
 		return 0;
-	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+	return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 
 STATIC int
@@ -295,7 +295,7 @@ xfs_file_aio_read(
 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
 		if (inode->i_mapping->nrpages) {
-			ret = -filemap_write_and_wait_range(
+			ret = filemap_write_and_wait_range(
 							VFS_I(ip)->i_mapping,
 							pos, -1);
 			if (ret) {
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write(
 		goto out;
 
 	if (mapping->nrpages) {
-		ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
 						    pos, -1);
 		if (ret)
 			goto out;
@@ -837,11 +837,19 @@ xfs_file_fallocate(
 		unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
 
 		if (offset & blksize_mask || len & blksize_mask) {
-			error = -EINVAL;
+			error = EINVAL;
+			goto out_unlock;
+		}
+
+		/*
+		 * There is no need to overlap collapse range with EOF,
+		 * in which case it is effectively a truncate operation
+		 */
+		if (offset + len >= i_size_read(inode)) {
+			error = EINVAL;
 			goto out_unlock;
 		}
 
-		ASSERT(offset + len < i_size_read(inode));
 		new_size = i_size_read(inode) - len;
 
 		error = xfs_collapse_file_space(ip, offset, len);
@@ -936,7 +944,7 @@ xfs_dir_open(
 	 */
 	mode = xfs_ilock_data_map_shared(ip);
 	if (ip->i_d.di_nextents > 0)
-		xfs_dir3_data_readahead(NULL, ip, 0, -1);
+		xfs_dir3_data_readahead(ip, 0, -1);
 	xfs_iunlock(ip, mode);
 	return 0;
 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 12b6e77..8ec81bed7 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * Copyright (c) 2014 Christoph Hellwig.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -32,100 +33,20 @@
 #include "xfs_filestream.h"
 #include "xfs_trace.h"
 
-#ifdef XFS_FILESTREAMS_TRACE
-
-ktrace_t *xfs_filestreams_trace_buf;
-
-STATIC void
-xfs_filestreams_trace(
-	xfs_mount_t	*mp,	/* mount point */
-	int		type,	/* type of trace */
-	const char	*func,	/* source function */
-	int		line,	/* source line number */
-	__psunsigned_t	arg0,
-	__psunsigned_t	arg1,
-	__psunsigned_t	arg2,
-	__psunsigned_t	arg3,
-	__psunsigned_t	arg4,
-	__psunsigned_t	arg5)
-{
-	ktrace_enter(xfs_filestreams_trace_buf,
-		(void *)(__psint_t)(type | (line << 16)),
-		(void *)func,
-		(void *)(__psunsigned_t)current_pid(),
-		(void *)mp,
-		(void *)(__psunsigned_t)arg0,
-		(void *)(__psunsigned_t)arg1,
-		(void *)(__psunsigned_t)arg2,
-		(void *)(__psunsigned_t)arg3,
-		(void *)(__psunsigned_t)arg4,
-		(void *)(__psunsigned_t)arg5,
-		NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-#define TRACE0(mp,t)			TRACE6(mp,t,0,0,0,0,0,0)
-#define TRACE1(mp,t,a0)			TRACE6(mp,t,a0,0,0,0,0,0)
-#define TRACE2(mp,t,a0,a1)		TRACE6(mp,t,a0,a1,0,0,0,0)
-#define TRACE3(mp,t,a0,a1,a2)		TRACE6(mp,t,a0,a1,a2,0,0,0)
-#define TRACE4(mp,t,a0,a1,a2,a3)	TRACE6(mp,t,a0,a1,a2,a3,0,0)
-#define TRACE5(mp,t,a0,a1,a2,a3,a4)	TRACE6(mp,t,a0,a1,a2,a3,a4,0)
-#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
-	xfs_filestreams_trace(mp, t, __func__, __LINE__, \
-				(__psunsigned_t)a0, (__psunsigned_t)a1, \
-				(__psunsigned_t)a2, (__psunsigned_t)a3, \
-				(__psunsigned_t)a4, (__psunsigned_t)a5)
-
-#define TRACE_AG_SCAN(mp, ag, ag2) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
-#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
-		TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
-			 cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
-		TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
-		TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
-
-
-#else
-#define TRACE_AG_SCAN(mp, ag, ag2)
-#define TRACE_AG_PICK1(mp, max_ag, maxfree)
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag)
-#endif
-
-static kmem_zone_t *item_zone;
+struct xfs_fstrm_item {
+	struct xfs_mru_cache_elem	mru;
+	struct xfs_inode		*ip;
+	xfs_agnumber_t			ag; /* AG in use for this directory */
+};
 
-/*
- * Structure for associating a file or a directory with an allocation group.
- * The parent directory pointer is only needed for files, but since there will
- * generally be vastly more files than directories in the cache, using the same
- * data structure simplifies the code with very little memory overhead.
- */
-typedef struct fstrm_item
-{
-	xfs_agnumber_t	ag;	/* AG currently in use for the file/directory. */
-	xfs_inode_t	*ip;	/* inode self-pointer. */
-	xfs_inode_t	*pip;	/* Parent directory inode pointer. */
-} fstrm_item_t;
+enum xfs_fstrm_alloc {
+	XFS_PICK_USERDATA = 1,
+	XFS_PICK_LOWSPACE = 2,
+};
 
 /*
  * Allocation group filestream associations are tracked with per-ag atomic
- * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
  * particular AG already has active filestreams associated with it. The mount
  * point's m_peraglock is used to protect these counters from per-ag array
  * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
@@ -160,7 +81,7 @@ typedef struct fstrm_item
  * the cache that reference per-ag array elements that have since been
  * reallocated.
  */
-static int
+int
 xfs_filestream_peek_ag(
 	xfs_mount_t	*mp,
 	xfs_agnumber_t	agno)
@@ -200,23 +121,40 @@ xfs_filestream_put_ag(
 	xfs_perag_put(pag);
 }
 
+static void
+xfs_fstrm_free_func(
+	struct xfs_mru_cache_elem *mru)
+{
+	struct xfs_fstrm_item	*item =
+		container_of(mru, struct xfs_fstrm_item, mru);
+
+	xfs_filestream_put_ag(item->ip->i_mount, item->ag);
+
+	trace_xfs_filestream_free(item->ip, item->ag);
+
+	kmem_free(item);
+}
+
 /*
  * Scan the AGs starting at startag looking for an AG that isn't in use and has
  * at least minlen blocks free.
  */
 static int
-_xfs_filestream_pick_ag(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	startag,
-	xfs_agnumber_t	*agp,
-	int		flags,
-	xfs_extlen_t	minlen)
+xfs_filestream_pick_ag(
+	struct xfs_inode	*ip,
+	xfs_agnumber_t		startag,
+	xfs_agnumber_t		*agp,
+	int			flags,
+	xfs_extlen_t		minlen)
 {
-	int		streams, max_streams;
-	int		err, trylock, nscan;
-	xfs_extlen_t	longest, free, minfree, maxfree = 0;
-	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER;
-	struct xfs_perag *pag;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_fstrm_item	*item;
+	struct xfs_perag	*pag;
+	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
+	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
+	int			err, trylock, nscan;
+
+	ASSERT(S_ISDIR(ip->i_d.di_mode));
 
 	/* 2% of an AG's blocks must be free for it to be chosen. */
 	minfree = mp->m_sb.sb_agblocks / 50;
@@ -228,8 +166,9 @@ _xfs_filestream_pick_ag(
 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
 
 	for (nscan = 0; 1; nscan++) {
+		trace_xfs_filestream_scan(ip, ag);
+
 		pag = xfs_perag_get(mp, ag);
-		TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
 
 		if (!pag->pagf_init) {
 			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
@@ -246,7 +185,6 @@ _xfs_filestream_pick_ag(
 		/* Keep track of the AG with the most free blocks. */
 		if (pag->pagf_freeblks > maxfree) {
 			maxfree = pag->pagf_freeblks;
-			max_streams = atomic_read(&pag->pagf_fstrms);
 			max_ag = ag;
 		}
 
@@ -269,7 +207,6 @@ _xfs_filestream_pick_ag(
 
 			/* Break out, retaining the reference on the AG. */
 			free = pag->pagf_freeblks;
-			streams = atomic_read(&pag->pagf_fstrms);
 			xfs_perag_put(pag);
 			*agp = ag;
 			break;
@@ -305,317 +242,98 @@ next_ag:
 		 */
 		if (max_ag != NULLAGNUMBER) {
 			xfs_filestream_get_ag(mp, max_ag);
-			TRACE_AG_PICK1(mp, max_ag, maxfree);
-			streams = max_streams;
 			free = maxfree;
 			*agp = max_ag;
 			break;
 		}
 
 		/* take AG 0 if none matched */
-		TRACE_AG_PICK1(mp, max_ag, maxfree);
+		trace_xfs_filestream_pick(ip, *agp, free, nscan);
 		*agp = 0;
 		return 0;
 	}
 
-	TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
-
-	return 0;
-}
+	trace_xfs_filestream_pick(ip, *agp, free, nscan);
 
-/*
- * Set the allocation group number for a file or a directory, updating inode
- * references and per-AG references as appropriate.
- */
-static int
-_xfs_filestream_update_ag(
-	xfs_inode_t	*ip,
-	xfs_inode_t	*pip,
-	xfs_agnumber_t	ag)
-{
-	int		err = 0;
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	old_ag;
-	xfs_inode_t	*old_pip;
-
-	/*
-	 * Either ip is a regular file and pip is a directory, or ip is a
-	 * directory and pip is NULL.
-	 */
-	ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
-	               S_ISDIR(pip->i_d.di_mode)) ||
-	              (S_ISDIR(ip->i_d.di_mode) && !pip)));
-
-	mp = ip->i_mount;
-	cache = mp->m_filestream;
-
-	item = xfs_mru_cache_lookup(cache, ip->i_ino);
-	if (item) {
-		ASSERT(item->ip == ip);
-		old_ag = item->ag;
-		item->ag = ag;
-		old_pip = item->pip;
-		item->pip = pip;
-		xfs_mru_cache_done(cache);
-
-		/*
-		 * If the AG has changed, drop the old ref and take a new one,
-		 * effectively transferring the reference from old to new AG.
-		 */
-		if (ag != old_ag) {
-			xfs_filestream_put_ag(mp, old_ag);
-			xfs_filestream_get_ag(mp, ag);
-		}
-
-		/*
-		 * If ip is a file and its pip has changed, drop the old ref and
-		 * take a new one.
-		 */
-		if (pip && pip != old_pip) {
-			IRELE(old_pip);
-			IHOLD(pip);
-		}
-
-		TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
-				ag, xfs_filestream_peek_ag(mp, ag));
+	if (*agp == NULLAGNUMBER)
 		return 0;
-	}
 
-	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+	err = ENOMEM;
+	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
 	if (!item)
-		return ENOMEM;
+		goto out_put_ag;
 
-	item->ag = ag;
+	item->ag = *agp;
 	item->ip = ip;
-	item->pip = pip;
 
-	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
 	if (err) {
-		kmem_zone_free(item_zone, item);
-		return err;
+		if (err == EEXIST)
+			err = 0;
+		goto out_free_item;
 	}
 
-	/* Take a reference on the AG. */
-	xfs_filestream_get_ag(mp, ag);
-
-	/*
-	 * Take a reference on the inode itself regardless of whether it's a
-	 * regular file or a directory.
-	 */
-	IHOLD(ip);
-
-	/*
-	 * In the case of a regular file, take a reference on the parent inode
-	 * as well to ensure it remains in-core.
-	 */
-	if (pip)
-		IHOLD(pip);
-
-	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
-			ag, xfs_filestream_peek_ag(mp, ag));
-
 	return 0;
-}
-
-/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
-STATIC void
-xfs_fstrm_free_func(
-	unsigned long	ino,
-	void		*data)
-{
-	fstrm_item_t	*item  = (fstrm_item_t *)data;
-	xfs_inode_t	*ip = item->ip;
-
-	ASSERT(ip->i_ino == ino);
-
-	xfs_iflags_clear(ip, XFS_IFILESTREAM);
-
-	/* Drop the reference taken on the AG when the item was added. */
-	xfs_filestream_put_ag(ip->i_mount, item->ag);
-
-	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
-		xfs_filestream_peek_ag(ip->i_mount, item->ag));
-
-	/*
-	 * _xfs_filestream_update_ag() always takes a reference on the inode
-	 * itself, whether it's a file or a directory.  Release it here.
-	 * This can result in the inode being freed and so we must
-	 * not hold any inode locks when freeing filesstreams objects
-	 * otherwise we can deadlock here.
-	 */
-	IRELE(ip);
-
-	/*
-	 * In the case of a regular file, _xfs_filestream_update_ag() also
-	 * takes a ref on the parent inode to keep it in-core.  Release that
-	 * too.
-	 */
-	if (item->pip)
-		IRELE(item->pip);
-
-	/* Finally, free the memory allocated for the item. */
-	kmem_zone_free(item_zone, item);
-}
-
-/*
- * xfs_filestream_init() is called at xfs initialisation time to set up the
- * memory zone that will be used for filestream data structure allocation.
- */
-int
-xfs_filestream_init(void)
-{
-	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
-	if (!item_zone)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/*
- * xfs_filestream_uninit() is called at xfs termination time to destroy the
- * memory zone that was used for filestream data structure allocation.
- */
-void
-xfs_filestream_uninit(void)
-{
-	kmem_zone_destroy(item_zone);
-}
-
-/*
- * xfs_filestream_mount() is called when a file system is mounted with the
- * filestream option.  It is responsible for allocating the data structures
- * needed to track the new file system's file streams.
- */
-int
-xfs_filestream_mount(
-	xfs_mount_t	*mp)
-{
-	int		err;
-	unsigned int	lifetime, grp_count;
-
-	/*
-	 * The filestream timer tunable is currently fixed within the range of
-	 * one second to four minutes, with five seconds being the default.  The
-	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
-	 * timer tunable to within about 10 percent.  This requires at least 10
-	 * groups.
-	 */
-	lifetime  = xfs_fstrm_centisecs * 10;
-	grp_count = 10;
-
-	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
-	                     xfs_fstrm_free_func);
 
+out_free_item:
+	kmem_free(item);
+out_put_ag:
+	xfs_filestream_put_ag(mp, *agp);
 	return err;
 }
 
-/*
- * xfs_filestream_unmount() is called when a file system that was mounted with
- * the filestream option is unmounted.  It drains the data structures created
- * to track the file system's file streams and frees all the memory that was
- * allocated.
- */
-void
-xfs_filestream_unmount(
-	xfs_mount_t	*mp)
+static struct xfs_inode *
+xfs_filestream_get_parent(
+	struct xfs_inode	*ip)
 {
-	xfs_mru_cache_destroy(mp->m_filestream);
-}
+	struct inode		*inode = VFS_I(ip), *dir = NULL;
+	struct dentry		*dentry, *parent;
 
-/*
- * Return the AG of the filestream the file or directory belongs to, or
- * NULLAGNUMBER otherwise.
- */
-xfs_agnumber_t
-xfs_filestream_lookup_ag(
-	xfs_inode_t	*ip)
-{
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	ag;
-	int		ref;
-
-	if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
-		ASSERT(0);
-		return NULLAGNUMBER;
-	}
+	dentry = d_find_alias(inode);
+	if (!dentry)
+		goto out;
 
-	cache = ip->i_mount->m_filestream;
-	item = xfs_mru_cache_lookup(cache, ip->i_ino);
-	if (!item) {
-		TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
-		return NULLAGNUMBER;
-	}
+	parent = dget_parent(dentry);
+	if (!parent)
+		goto out_dput;
 
-	ASSERT(ip == item->ip);
-	ag = item->ag;
-	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
-	xfs_mru_cache_done(cache);
+	dir = igrab(parent->d_inode);
+	dput(parent);
 
-	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
-	return ag;
+out_dput:
+	dput(dentry);
+out:
+	return dir ? XFS_I(dir) : NULL;
 }
 
 /*
- * xfs_filestream_associate() should only be called to associate a regular file
- * with its parent directory.  Calling it with a child directory isn't
- * appropriate because filestreams don't apply to entire directory hierarchies.
- * Creating a file in a child directory of an existing filestream directory
- * starts a new filestream with its own allocation group association.
+ * Find the right allocation group for a file, either by finding an
+ * existing file stream or creating a new one.
  *
- * Returns < 0 on error, 0 if successful association occurred, > 0 if
- * we failed to get an association because of locking issues.
+ * Returns NULLAGNUMBER in case of an error.
  */
-int
-xfs_filestream_associate(
-	xfs_inode_t	*pip,
-	xfs_inode_t	*ip)
+xfs_agnumber_t
+xfs_filestream_lookup_ag(
+	struct xfs_inode	*ip)
 {
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	ag, rotorstep, startag;
-	int		err = 0;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_inode	*pip = NULL;
+	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
+	struct xfs_mru_cache_elem *mru;
 
-	ASSERT(S_ISDIR(pip->i_d.di_mode));
 	ASSERT(S_ISREG(ip->i_d.di_mode));
-	if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
-		return -EINVAL;
 
-	mp = pip->i_mount;
-	cache = mp->m_filestream;
+	pip = xfs_filestream_get_parent(ip);
+	if (!pip)
+		goto out;
 
-	/*
-	 * We have a problem, Houston.
-	 *
-	 * Taking the iolock here violates inode locking order - we already
-	 * hold the ilock. Hence if we block getting this lock we may never
-	 * wake. Unfortunately, that means if we can't get the lock, we're
-	 * screwed in terms of getting a stream association - we can't spin
-	 * waiting for the lock because someone else is waiting on the lock we
-	 * hold and we cannot drop that as we are in a transaction here.
-	 *
-	 * Lucky for us, this inversion is not a problem because it's a
-	 * directory inode that we are trying to lock here.
-	 *
-	 * So, if we can't get the iolock without sleeping then just give up
-	 */
-	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
-		return 1;
-
-	/* If the parent directory is already in the cache, use its AG. */
-	item = xfs_mru_cache_lookup(cache, pip->i_ino);
-	if (item) {
-		ASSERT(item->ip == pip);
-		ag = item->ag;
-		xfs_mru_cache_done(cache);
-
-		TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
-		err = _xfs_filestream_update_ag(ip, pip, ag);
+	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
+	if (mru) {
+		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
+		xfs_mru_cache_done(mp->m_filestream);
 
-		goto exit;
+		trace_xfs_filestream_lookup(ip, ag);
+		goto out;
 	}
 
 	/*
@@ -623,202 +341,94 @@ xfs_filestream_associate(
 	 * use the directory inode's AG.
 	 */
 	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-		rotorstep = xfs_rotorstep;
+		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
 		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
 		                 (mp->m_sb.sb_agcount * rotorstep);
 	} else
 		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
 
-	/* Pick a new AG for the parent inode starting at startag. */
-	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
-	if (err || ag == NULLAGNUMBER)
-		goto exit_did_pick;
-
-	/* Associate the parent inode with the AG. */
-	err = _xfs_filestream_update_ag(pip, NULL, ag);
-	if (err)
-		goto exit_did_pick;
-
-	/* Associate the file inode with the AG. */
-	err = _xfs_filestream_update_ag(ip, pip, ag);
-	if (err)
-		goto exit_did_pick;
-
-	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
-
-exit_did_pick:
-	/*
-	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
-	 * reference it took on it, since the file and directory will have taken
-	 * their own now if they were successfully cached.
-	 */
-	if (ag != NULLAGNUMBER)
-		xfs_filestream_put_ag(mp, ag);
-
-exit:
-	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
-	return -err;
+	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
+		ag = NULLAGNUMBER;
+out:
+	IRELE(pip);
+	return ag;
 }
 
 /*
- * Pick a new allocation group for the current file and its file stream.  This
- * function is called by xfs_bmap_filestreams() with the mount point's per-ag
- * lock held.
+ * Pick a new allocation group for the current file and its file stream.
+ *
+ * This is called when the allocator can't find a suitable extent in the
+ * current AG, and we have to move the stream into a new AG with more space.
  */
 int
 xfs_filestream_new_ag(
 	struct xfs_bmalloca	*ap,
 	xfs_agnumber_t		*agp)
 {
-	int		flags, err;
-	xfs_inode_t	*ip, *pip = NULL;
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	xfs_extlen_t	minlen;
-	fstrm_item_t	*dir, *file;
-	xfs_agnumber_t	ag = NULLAGNUMBER;
-
-	ip = ap->ip;
-	mp = ip->i_mount;
-	cache = mp->m_filestream;
-	minlen = ap->length;
-	*agp = NULLAGNUMBER;
+	struct xfs_inode	*ip = ap->ip, *pip;
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_extlen_t		minlen = ap->length;
+	xfs_agnumber_t		startag = 0;
+	int			flags, err = 0;
+	struct xfs_mru_cache_elem *mru;
 
-	/*
-	 * Look for the file in the cache, removing it if it's found.  Doing
-	 * this allows it to be held across the dir lookup that follows.
-	 */
-	file = xfs_mru_cache_remove(cache, ip->i_ino);
-	if (file) {
-		ASSERT(ip == file->ip);
-
-		/* Save the file's parent inode and old AG number for later. */
-		pip = file->pip;
-		ag = file->ag;
-
-		/* Look for the file's directory in the cache. */
-		dir = xfs_mru_cache_lookup(cache, pip->i_ino);
-		if (dir) {
-			ASSERT(pip == dir->ip);
-
-			/*
-			 * If the directory has already moved on to a new AG,
-			 * use that AG as the new AG for the file. Don't
-			 * forget to twiddle the AG refcounts to match the
-			 * movement.
-			 */
-			if (dir->ag != file->ag) {
-				xfs_filestream_put_ag(mp, file->ag);
-				xfs_filestream_get_ag(mp, dir->ag);
-				*agp = file->ag = dir->ag;
-			}
-
-			xfs_mru_cache_done(cache);
-		}
+	*agp = NULLAGNUMBER;
 
-		/*
-		 * Put the file back in the cache.  If this fails, the free
-		 * function needs to be called to tidy up in the same way as if
-		 * the item had simply expired from the cache.
-		 */
-		err = xfs_mru_cache_insert(cache, ip->i_ino, file);
-		if (err) {
-			xfs_fstrm_free_func(ip->i_ino, file);
-			return err;
-		}
+	pip = xfs_filestream_get_parent(ip);
+	if (!pip)
+		goto exit;
 
-		/*
-		 * If the file's AG was moved to the directory's new AG, there's
-		 * nothing more to be done.
-		 */
-		if (*agp != NULLAGNUMBER) {
-			TRACE_MOVEAG(mp, ip, pip,
-					ag, xfs_filestream_peek_ag(mp, ag),
-					*agp, xfs_filestream_peek_ag(mp, *agp));
-			return 0;
-		}
+	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
+	if (mru) {
+		struct xfs_fstrm_item *item =
+			container_of(mru, struct xfs_fstrm_item, mru);
+		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
 	}
 
-	/*
-	 * If the file's parent directory is known, take its iolock in exclusive
-	 * mode to prevent two sibling files from racing each other to migrate
-	 * themselves and their parent to different AGs.
-	 *
-	 * Note that we lock the parent directory iolock inside the child
-	 * iolock here.  That's fine as we never hold both parent and child
-	 * iolock in any other place.  This is different from the ilock,
-	 * which requires locking of the child after the parent for namespace
-	 * operations.
-	 */
-	if (pip)
-		xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
-
-	/*
-	 * A new AG needs to be found for the file.  If the file's parent
-	 * directory is also known, it will be moved to the new AG as well to
-	 * ensure that files created inside it in future use the new AG.
-	 */
-	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
 	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
 	        (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
 
-	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
-	if (err || *agp == NULLAGNUMBER)
-		goto exit;
+	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
 
 	/*
-	 * If the file wasn't found in the file cache, then its parent directory
-	 * inode isn't known.  For this to have happened, the file must either
-	 * be pre-existing, or it was created long enough ago that its cache
-	 * entry has expired.  This isn't the sort of usage that the filestreams
-	 * allocator is trying to optimise, so there's no point trying to track
-	 * its new AG somehow in the filestream data structures.
+	 * Only free the item here so we skip over the old AG earlier.
 	 */
-	if (!pip) {
-		TRACE_ORPHAN(mp, ip, *agp);
-		goto exit;
-	}
-
-	/* Associate the parent inode with the AG. */
-	err = _xfs_filestream_update_ag(pip, NULL, *agp);
-	if (err)
-		goto exit;
-
-	/* Associate the file inode with the AG. */
-	err = _xfs_filestream_update_ag(ip, pip, *agp);
-	if (err)
-		goto exit;
-
-	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
-			*agp, xfs_filestream_peek_ag(mp, *agp));
+	if (mru)
+		xfs_fstrm_free_func(mru);
 
+	IRELE(pip);
 exit:
-	/*
-	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
-	 * reference it took on it, since the file and directory will have taken
-	 * their own now if they were successfully cached.
-	 */
-	if (*agp != NULLAGNUMBER)
-		xfs_filestream_put_ag(mp, *agp);
-	else
+	if (*agp == NULLAGNUMBER)
 		*agp = 0;
-
-	if (pip)
-		xfs_iunlock(pip, XFS_IOLOCK_EXCL);
-
 	return err;
 }
 
-/*
- * Remove an association between an inode and a filestream object.
- * Typically this is done on last close of an unlinked file.
- */
 void
 xfs_filestream_deassociate(
-	xfs_inode_t	*ip)
+	struct xfs_inode	*ip)
 {
-	xfs_mru_cache_t	*cache = ip->i_mount->m_filestream;
+	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
+}
+
+int
+xfs_filestream_mount(
+	xfs_mount_t	*mp)
+{
+	/*
+	 * The filestream timer tunable is currently fixed within the range of
+	 * one second to four minutes, with five seconds being the default.  The
+	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
+	 * timer tunable to within about 10 percent.  This requires at least 10
+	 * groups.
+	 */
+	return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
+				    10, xfs_fstrm_free_func);
+}
 
-	xfs_mru_cache_delete(cache, ip->i_ino);
+void
+xfs_filestream_unmount(
+	xfs_mount_t	*mp)
+{
+	xfs_mru_cache_destroy(mp->m_filestream);
 }
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 6d61dbe..2ef4340 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -20,50 +20,20 @@
 
 struct xfs_mount;
 struct xfs_inode;
-struct xfs_perag;
 struct xfs_bmalloca;
 
-#ifdef XFS_FILESTREAMS_TRACE
-#define XFS_FSTRM_KTRACE_INFO		1
-#define XFS_FSTRM_KTRACE_AGSCAN		2
-#define XFS_FSTRM_KTRACE_AGPICK1	3
-#define XFS_FSTRM_KTRACE_AGPICK2	4
-#define XFS_FSTRM_KTRACE_UPDATE		5
-#define XFS_FSTRM_KTRACE_FREE		6
-#define	XFS_FSTRM_KTRACE_ITEM_LOOKUP	7
-#define	XFS_FSTRM_KTRACE_ASSOCIATE	8
-#define	XFS_FSTRM_KTRACE_MOVEAG		9
-#define	XFS_FSTRM_KTRACE_ORPHAN		10
-
-#define XFS_FSTRM_KTRACE_SIZE	16384
-extern ktrace_t *xfs_filestreams_trace_buf;
-
-#endif
-
-/* allocation selection flags */
-typedef enum xfs_fstrm_alloc {
-	XFS_PICK_USERDATA = 1,
-	XFS_PICK_LOWSPACE = 2,
-} xfs_fstrm_alloc_t;
-
-/* prototypes for filestream.c */
-int xfs_filestream_init(void);
-void xfs_filestream_uninit(void);
 int xfs_filestream_mount(struct xfs_mount *mp);
 void xfs_filestream_unmount(struct xfs_mount *mp);
-xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
-int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
 void xfs_filestream_deassociate(struct xfs_inode *ip);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
 int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
 
-
-/* filestreams for the inode? */
 static inline int
 xfs_inode_is_filestream(
 	struct xfs_inode	*ip)
 {
 	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
-		xfs_iflags_test(ip, XFS_IFILESTREAM) ||
 		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
 }
 
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 3789c6f..726f83a 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -87,7 +87,6 @@ xfs_inobt_alloc_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 98d3524..c48df5f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -507,8 +507,7 @@ STATIC int
 xfs_inode_ag_walk(
 	struct xfs_mount	*mp,
 	struct xfs_perag	*pag,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args,
@@ -582,7 +581,7 @@ restart:
 		for (i = 0; i < nr_found; i++) {
 			if (!batch[i])
 				continue;
-			error = execute(batch[i], pag, flags, args);
+			error = execute(batch[i], flags, args);
 			IRELE(batch[i]);
 			if (error == EAGAIN) {
 				skipped++;
@@ -636,8 +635,7 @@ xfs_eofblocks_worker(
 int
 xfs_inode_ag_iterator(
 	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args)
@@ -664,8 +662,7 @@ xfs_inode_ag_iterator(
 int
 xfs_inode_ag_iterator_tag(
 	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args,
@@ -1209,7 +1206,6 @@ xfs_inode_match_id(
 STATIC int
 xfs_inode_free_eofblocks(
 	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
 	int			flags,
 	void			*args)
 {
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9ed68bb..9cf017b 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -60,12 +60,10 @@ int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
 void xfs_eofblocks_worker(struct work_struct *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
-		int flags, void *args),
+	int (*execute)(struct xfs_inode *ip, int flags, void *args),
 	int flags, void *args);
 int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
-		int flags, void *args),
+	int (*execute)(struct xfs_inode *ip, int flags, void *args),
 	int flags, void *args, int tag);
 
 static inline int
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a9079c1..6d6b44a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -655,7 +655,6 @@ xfs_ialloc(
 	uint		flags;
 	int		error;
 	timespec_t	tv;
-	int		filestreams = 0;
 
 	/*
 	 * Call the space management code to pick
@@ -772,13 +771,6 @@ xfs_ialloc(
 		flags |= XFS_ILOG_DEV;
 		break;
 	case S_IFREG:
-		/*
-		 * we can't set up filestreams until after the VFS inode
-		 * is set up properly.
-		 */
-		if (pip && xfs_inode_is_filestream(pip))
-			filestreams = 1;
-		/* fall through */
 	case S_IFDIR:
 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 			uint	di_flags = 0;
@@ -844,15 +836,6 @@ xfs_ialloc(
 	/* now that we have an i_mode we can setup inode ops and unlock */
 	xfs_setup_inode(ip);
 
-	/* now we have set up the vfs inode we can associate the filestream */
-	if (filestreams) {
-		error = xfs_filestream_associate(pip, ip);
-		if (error < 0)
-			return -error;
-		if (!error)
-			xfs_iflags_set(ip, XFS_IFILESTREAM);
-	}
-
 	*ipp = ip;
 	return 0;
 }
@@ -1334,7 +1317,8 @@ int
 xfs_create_tmpfile(
 	struct xfs_inode	*dp,
 	struct dentry		*dentry,
-	umode_t			mode)
+	umode_t			mode,
+	struct xfs_inode	**ipp)
 {
 	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_inode	*ip = NULL;
@@ -1402,7 +1386,6 @@ xfs_create_tmpfile(
 	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
 
 	ip->i_d.di_nlink--;
-	d_tmpfile(dentry, VFS_I(ip));
 	error = xfs_iunlink(tp, ip);
 	if (error)
 		goto out_trans_abort;
@@ -1415,6 +1398,7 @@ xfs_create_tmpfile(
 	xfs_qm_dqrele(gdqp);
 	xfs_qm_dqrele(pdqp);
 
+	*ipp = ip;
 	return 0;
 
  out_trans_abort:
@@ -1698,16 +1682,6 @@ xfs_release(
 		int truncated;
 
 		/*
-		 * If we are using filestreams, and we have an unlinked
-		 * file that we are processing the last close on, then nothing
-		 * will be able to reopen and write to this file. Purge this
-		 * inode from the filestreams cache so that it doesn't delay
-		 * teardown of the inode.
-		 */
-		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
-			xfs_filestream_deassociate(ip);
-
-		/*
 		 * If we previously truncated this file and removed old data
 		 * in the process, we want to initiate "early" writeout on
 		 * the last close.  This is an attempt to combat the notorious
@@ -2687,13 +2661,7 @@ xfs_remove(
 	if (error)
 		goto std_return;
 
-	/*
-	 * If we are using filestreams, kill the stream association.
-	 * If the file is still open it may get a new one but that
-	 * will get killed on last close in xfs_close() so we don't
-	 * have to worry about that.
-	 */
-	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
+	if (is_dir && xfs_inode_is_filestream(ip))
 		xfs_filestream_deassociate(ip);
 
 	return 0;
@@ -3395,9 +3363,9 @@ xfs_iflush_int(
 		}
 	}
 
-	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
+	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 	if (XFS_IFORK_Q(ip))
-		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
+		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 	xfs_inobp_check(mp, bp);
 
 	/*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 396cc1f..13aea54 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -209,7 +209,6 @@ xfs_get_initial_prid(struct xfs_inode *dp)
 #define XFS_ISTALE		(1 << 1) /* inode has been staled */
 #define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
 #define XFS_INEW		(1 << 3) /* inode has just been allocated */
-#define XFS_IFILESTREAM		(1 << 4) /* inode is in a filestream dir. */
 #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
 #define __XFS_IFLOCK_BIT	7	 /* inode is being flushed right now */
@@ -225,8 +224,7 @@ xfs_get_initial_prid(struct xfs_inode *dp)
  */
 #define XFS_IRECLAIM_RESET_FLAGS	\
 	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
-	 XFS_IFILESTREAM);
+	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
 
 /*
  * Synchronize processes attempting to flush the in-core inode back to disk.
@@ -334,7 +332,7 @@ int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
 int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,
 			   umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
 int		xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
-			   umode_t mode);
+			   umode_t mode, struct xfs_inode **ipp);
 int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
 			   struct xfs_inode *ip);
 int		xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
index 73514c0..b031e8d 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/xfs_inode_fork.c
@@ -798,8 +798,7 @@ xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
 	xfs_inode_log_item_t	*iip,
-	int			whichfork,
-	xfs_buf_t		*bp)
+	int			whichfork)
 {
 	char			*cp;
 	xfs_ifork_t		*ifp;
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h
index eb329a1..7d3b1ed 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/xfs_inode_fork.h
@@ -127,8 +127,7 @@ typedef struct xfs_ifork {
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
 void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
-				struct xfs_inode_log_item *, int,
-				struct xfs_buf *);
+				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
 void		xfs_iroot_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3b80eba..6c5eb4c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -730,7 +730,7 @@ xfs_iomap_write_allocate(
 			 */
 			nimaps = 1;
 			end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
-			error = xfs_bmap_last_offset(NULL, ip, &last_block,
+			error = xfs_bmap_last_offset(ip, &last_block,
 							XFS_DATA_FORK);
 			if (error)
 				goto trans_cancel;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e4..36d6303 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -72,8 +72,8 @@ xfs_initxattrs(
 	int			error = 0;
 
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-		error = xfs_attr_set(ip, xattr->name, xattr->value,
-				     xattr->value_len, ATTR_SECURE);
+		error = -xfs_attr_set(ip, xattr->name, xattr->value,
+				      xattr->value_len, ATTR_SECURE);
 		if (error < 0)
 			break;
 	}
@@ -93,8 +93,8 @@ xfs_init_security(
 	struct inode	*dir,
 	const struct qstr *qstr)
 {
-	return security_inode_init_security(inode, dir, qstr,
-					    &xfs_initxattrs, NULL);
+	return -security_inode_init_security(inode, dir, qstr,
+					     &xfs_initxattrs, NULL);
 }
 
 static void
@@ -124,15 +124,15 @@ xfs_cleanup_inode(
 	xfs_dentry_to_name(&teardown, dentry, 0);
 
 	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-	iput(inode);
 }
 
 STATIC int
-xfs_vn_mknod(
+xfs_generic_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	umode_t		mode,
-	dev_t		rdev)
+	dev_t		rdev,
+	bool		tmpfile)	/* unnamed file */
 {
 	struct inode	*inode;
 	struct xfs_inode *ip = NULL;
@@ -156,8 +156,12 @@ xfs_vn_mknod(
 	if (error)
 		return error;
 
-	xfs_dentry_to_name(&name, dentry, mode);
-	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+	if (!tmpfile) {
+		xfs_dentry_to_name(&name, dentry, mode);
+		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+	} else {
+		error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+	}
 	if (unlikely(error))
 		goto out_free_acl;
 
@@ -169,18 +173,22 @@ xfs_vn_mknod(
 
 #ifdef CONFIG_XFS_POSIX_ACL
 	if (default_acl) {
-		error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		if (error)
 			goto out_cleanup_inode;
 	}
 	if (acl) {
-		error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
 		if (error)
 			goto out_cleanup_inode;
 	}
 #endif
 
-	d_instantiate(dentry, inode);
+	if (tmpfile)
+		d_tmpfile(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
+
  out_free_acl:
 	if (default_acl)
 		posix_acl_release(default_acl);
@@ -189,11 +197,23 @@ xfs_vn_mknod(
 	return -error;
 
  out_cleanup_inode:
-	xfs_cleanup_inode(dir, inode, dentry);
+	if (!tmpfile)
+		xfs_cleanup_inode(dir, inode, dentry);
+	iput(inode);
 	goto out_free_acl;
 }
 
 STATIC int
+xfs_vn_mknod(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	umode_t		mode,
+	dev_t		rdev)
+{
+	return xfs_generic_create(dir, dentry, mode, rdev, false);
+}
+
+STATIC int
 xfs_vn_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
@@ -353,6 +373,7 @@ xfs_vn_symlink(
 
  out_cleanup_inode:
 	xfs_cleanup_inode(dir, inode, dentry);
+	iput(inode);
  out:
 	return -error;
 }
@@ -1053,11 +1074,7 @@ xfs_vn_tmpfile(
 	struct dentry	*dentry,
 	umode_t		mode)
 {
-	int		error;
-
-	error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
-
-	return -error;
+	return xfs_generic_create(dir, dentry, mode, 0, true);
 }
 
 static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00..a5f8bd9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -616,11 +616,13 @@ xfs_log_mount(
 	int		error = 0;
 	int		min_logfsbs;
 
-	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
-		xfs_notice(mp, "Mounting Filesystem");
-	else {
+	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
+		xfs_notice(mp, "Mounting V%d Filesystem",
+			   XFS_SB_VERSION_NUM(&mp->m_sb));
+	} else {
 		xfs_notice(mp,
-"Mounting filesystem in no-recovery mode.  Filesystem will be inconsistent.");
+"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
+			   XFS_SB_VERSION_NUM(&mp->m_sb));
 		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
 	}
 
@@ -1181,11 +1183,14 @@ xlog_iodone(xfs_buf_t *bp)
 	/* log I/O is always issued ASYNC */
 	ASSERT(XFS_BUF_ISASYNC(bp));
 	xlog_state_done_syncing(iclog, aborted);
+
 	/*
-	 * do not reference the buffer (bp) here as we could race
-	 * with it being freed after writing the unmount record to the
-	 * log.
+	 * drop the buffer lock now that we are done. Nothing references
+	 * the buffer after this, so an unmount waiting on this lock can now
+	 * tear it down safely. As such, it is unsafe to reference the buffer
+	 * (bp) after the unlock as we could race with it being freed.
 	 */
+	xfs_buf_unlock(bp);
 }
 
 /*
@@ -1368,8 +1373,16 @@ xlog_alloc_log(
 	bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
 	if (!bp)
 		goto out_free_log;
-	bp->b_iodone = xlog_iodone;
+
+	/*
+	 * The iclogbuf buffer locks are held over IO but we are not going to do
+	 * IO yet.  Hence unlock the buffer so that the log IO path can grab it
+	 * when appropriately.
+	 */
 	ASSERT(xfs_buf_islocked(bp));
+	xfs_buf_unlock(bp);
+
+	bp->b_iodone = xlog_iodone;
 	log->l_xbuf = bp;
 
 	spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1411,9 @@ xlog_alloc_log(
 		if (!bp)
 			goto out_free_iclog;
 
+		ASSERT(xfs_buf_islocked(bp));
+		xfs_buf_unlock(bp);
+
 		bp->b_iodone = xlog_iodone;
 		iclog->ic_bp = bp;
 		iclog->ic_data = bp->b_addr;
@@ -1422,7 +1438,6 @@ xlog_alloc_log(
 		iclog->ic_callback_tail = &(iclog->ic_callback);
 		iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
 
-		ASSERT(xfs_buf_islocked(iclog->ic_bp));
 		init_waitqueue_head(&iclog->ic_force_wait);
 		init_waitqueue_head(&iclog->ic_write_wait);
 
@@ -1631,6 +1646,12 @@ xlog_cksum(
  * we transition the iclogs to IOERROR state *after* flushing all existing
  * iclogs to disk. This is because we don't want anymore new transactions to be
  * started or completed afterwards.
+ *
+ * We lock the iclogbufs here so that we can serialise against IO completion
+ * during unmount. We might be processing a shutdown triggered during unmount,
+ * and that can occur asynchronously to the unmount thread, and hence we need to
+ * ensure that completes before tearing down the iclogbufs. Hence we need to
+ * hold the buffer lock across the log IO to acheive that.
  */
 STATIC int
 xlog_bdstrat(
@@ -1638,6 +1659,7 @@ xlog_bdstrat(
 {
 	struct xlog_in_core	*iclog = bp->b_fspriv;
 
+	xfs_buf_lock(bp);
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		xfs_buf_ioerror(bp, EIO);
 		xfs_buf_stale(bp);
@@ -1645,7 +1667,8 @@ xlog_bdstrat(
 		/*
 		 * It would seem logical to return EIO here, but we rely on
 		 * the log state machine to propagate I/O errors instead of
-		 * doing it here.
+		 * doing it here. Similarly, IO completion will unlock the
+		 * buffer, so we don't do it here.
 		 */
 		return 0;
 	}
@@ -1847,14 +1870,28 @@ xlog_dealloc_log(
 	xlog_cil_destroy(log);
 
 	/*
-	 * always need to ensure that the extra buffer does not point to memory
-	 * owned by another log buffer before we free it.
+	 * Cycle all the iclogbuf locks to make sure all log IO completion
+	 * is done before we tear down these buffers.
 	 */
+	iclog = log->l_iclog;
+	for (i = 0; i < log->l_iclog_bufs; i++) {
+		xfs_buf_lock(iclog->ic_bp);
+		xfs_buf_unlock(iclog->ic_bp);
+		iclog = iclog->ic_next;
+	}
+
+	/*
+	 * Always need to ensure that the extra buffer does not point to memory
+	 * owned by another log buffer before we free it. Also, cycle the lock
+	 * first to ensure we've completed IO on it.
+	 */
+	xfs_buf_lock(log->l_xbuf);
+	xfs_buf_unlock(log->l_xbuf);
 	xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
 	xfs_buf_free(log->l_xbuf);
 
 	iclog = log->l_iclog;
-	for (i=0; i<log->l_iclog_bufs; i++) {
+	for (i = 0; i < log->l_iclog_bufs; i++) {
 		xfs_buf_free(iclog->ic_bp);
 		next_iclog = iclog->ic_next;
 		kmem_free(iclog);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index f7843df..981af0f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3147,7 +3147,7 @@ xlog_recover_efd_pass2(
 		}
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 
 	return 0;
@@ -3522,8 +3522,7 @@ out:
 
 STATIC int
 xlog_recover_unmount_trans(
-	struct xlog		*log,
-	struct xlog_recover	*trans)
+	struct xlog		*log)
 {
 	/* Do nothing now */
 	xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
@@ -3597,7 +3596,7 @@ xlog_recover_process_data(
 								trans, pass);
 				break;
 			case XLOG_UNMOUNT_TRANS:
-				error = xlog_recover_unmount_trans(log, trans);
+				error = xlog_recover_unmount_trans(log);
 				break;
 			case XLOG_WAS_CONT_TRANS:
 				error = xlog_recover_add_to_cont_trans(log,
@@ -3759,7 +3758,7 @@ xlog_recover_process_efis(
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
 out:
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 	return error;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 993cb19..944f3d9 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -743,8 +743,6 @@ xfs_mountfs(
 		new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
 		if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
 			mp->m_inode_cluster_size = new_size;
-		xfs_info(mp, "Using inode cluster size of %d bytes",
-			 mp->m_inode_cluster_size);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4aff563..f99b493 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -100,14 +100,20 @@
  * likely result in a loop in one of the lists.  That's a sure-fire recipe for
  * an infinite loop in the code.
  */
-typedef struct xfs_mru_cache_elem
-{
-	struct list_head list_node;
-	unsigned long	key;
-	void		*value;
-} xfs_mru_cache_elem_t;
+struct xfs_mru_cache {
+	struct radix_tree_root	store;     /* Core storage data structure.  */
+	struct list_head	*lists;    /* Array of lists, one per grp.  */
+	struct list_head	reap_list; /* Elements overdue for reaping. */
+	spinlock_t		lock;      /* Lock to protect this struct.  */
+	unsigned int		grp_count; /* Number of discrete groups.    */
+	unsigned int		grp_time;  /* Time period spanned by grps.  */
+	unsigned int		lru_grp;   /* Group containing time zero.   */
+	unsigned long		time_zero; /* Time first element was added. */
+	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+	struct delayed_work	work;      /* Workqueue data for reaping.   */
+	unsigned int		queued;	   /* work has been queued */
+};
 
-static kmem_zone_t		*xfs_mru_elem_zone;
 static struct workqueue_struct	*xfs_mru_reap_wq;
 
 /*
@@ -129,12 +135,12 @@ static struct workqueue_struct	*xfs_mru_reap_wq;
  */
 STATIC unsigned long
 _xfs_mru_cache_migrate(
-	xfs_mru_cache_t	*mru,
-	unsigned long	now)
+	struct xfs_mru_cache	*mru,
+	unsigned long		now)
 {
-	unsigned int	grp;
-	unsigned int	migrated = 0;
-	struct list_head *lru_list;
+	unsigned int		grp;
+	unsigned int		migrated = 0;
+	struct list_head	*lru_list;
 
 	/* Nothing to do if the data store is empty. */
 	if (!mru->time_zero)
@@ -193,11 +199,11 @@ _xfs_mru_cache_migrate(
  */
 STATIC void
 _xfs_mru_cache_list_insert(
-	xfs_mru_cache_t		*mru,
-	xfs_mru_cache_elem_t	*elem)
+	struct xfs_mru_cache	*mru,
+	struct xfs_mru_cache_elem *elem)
 {
-	unsigned int	grp = 0;
-	unsigned long	now = jiffies;
+	unsigned int		grp = 0;
+	unsigned long		now = jiffies;
 
 	/*
 	 * If the data store is empty, initialise time zero, leave grp set to
@@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert(
  */
 STATIC void
 _xfs_mru_cache_clear_reap_list(
-	xfs_mru_cache_t		*mru) __releases(mru->lock) __acquires(mru->lock)
-
+	struct xfs_mru_cache	*mru)
+		__releases(mru->lock) __acquires(mru->lock)
 {
-	xfs_mru_cache_elem_t	*elem, *next;
+	struct xfs_mru_cache_elem *elem, *next;
 	struct list_head	tmp;
 
 	INIT_LIST_HEAD(&tmp);
@@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list(
 	spin_unlock(&mru->lock);
 
 	list_for_each_entry_safe(elem, next, &tmp, list_node) {
-
-		/* Remove the element from the reap list. */
 		list_del_init(&elem->list_node);
-
-		/* Call the client's free function with the key and value pointer. */
-		mru->free_func(elem->key, elem->value);
-
-		/* Free the element structure. */
-		kmem_zone_free(xfs_mru_elem_zone, elem);
+		mru->free_func(elem);
 	}
 
 	spin_lock(&mru->lock);
@@ -277,7 +276,8 @@ STATIC void
 _xfs_mru_cache_reap(
 	struct work_struct	*work)
 {
-	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work);
+	struct xfs_mru_cache	*mru =
+		container_of(work, struct xfs_mru_cache, work.work);
 	unsigned long		now, next;
 
 	ASSERT(mru && mru->lists);
@@ -304,28 +304,16 @@ _xfs_mru_cache_reap(
 int
 xfs_mru_cache_init(void)
 {
-	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
-	                                 "xfs_mru_cache_elem");
-	if (!xfs_mru_elem_zone)
-		goto out;
-
 	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
 	if (!xfs_mru_reap_wq)
-		goto out_destroy_mru_elem_zone;
-
+		return -ENOMEM;
 	return 0;
-
- out_destroy_mru_elem_zone:
-	kmem_zone_destroy(xfs_mru_elem_zone);
- out:
-	return -ENOMEM;
 }
 
 void
 xfs_mru_cache_uninit(void)
 {
 	destroy_workqueue(xfs_mru_reap_wq);
-	kmem_zone_destroy(xfs_mru_elem_zone);
 }
 
 /*
@@ -336,14 +324,14 @@ xfs_mru_cache_uninit(void)
  */
 int
 xfs_mru_cache_create(
-	xfs_mru_cache_t		**mrup,
+	struct xfs_mru_cache	**mrup,
 	unsigned int		lifetime_ms,
 	unsigned int		grp_count,
 	xfs_mru_cache_free_func_t free_func)
 {
-	xfs_mru_cache_t	*mru = NULL;
-	int		err = 0, grp;
-	unsigned int	grp_time;
+	struct xfs_mru_cache	*mru = NULL;
+	int			err = 0, grp;
+	unsigned int		grp_time;
 
 	if (mrup)
 		*mrup = NULL;
@@ -400,7 +388,7 @@ exit:
  */
 static void
 xfs_mru_cache_flush(
-	xfs_mru_cache_t		*mru)
+	struct xfs_mru_cache	*mru)
 {
 	if (!mru || !mru->lists)
 		return;
@@ -420,7 +408,7 @@ xfs_mru_cache_flush(
 
 void
 xfs_mru_cache_destroy(
-	xfs_mru_cache_t		*mru)
+	struct xfs_mru_cache	*mru)
 {
 	if (!mru || !mru->lists)
 		return;
@@ -438,38 +426,30 @@ xfs_mru_cache_destroy(
  */
 int
 xfs_mru_cache_insert(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key,
-	void		*value)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key,
+	struct xfs_mru_cache_elem *elem)
 {
-	xfs_mru_cache_elem_t *elem;
+	int			error;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
 		return EINVAL;
 
-	elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
-	if (!elem)
+	if (radix_tree_preload(GFP_KERNEL))
 		return ENOMEM;
 
-	if (radix_tree_preload(GFP_KERNEL)) {
-		kmem_zone_free(xfs_mru_elem_zone, elem);
-		return ENOMEM;
-	}
-
 	INIT_LIST_HEAD(&elem->list_node);
 	elem->key = key;
-	elem->value = value;
 
 	spin_lock(&mru->lock);
-
-	radix_tree_insert(&mru->store, key, elem);
+	error = -radix_tree_insert(&mru->store, key, elem);
 	radix_tree_preload_end();
-	_xfs_mru_cache_list_insert(mru, elem);
-
+	if (!error)
+		_xfs_mru_cache_list_insert(mru, elem);
 	spin_unlock(&mru->lock);
 
-	return 0;
+	return error;
 }
 
 /*
@@ -478,13 +458,12 @@ xfs_mru_cache_insert(
  * the client data pointer for the removed element is returned, otherwise this
  * function will return a NULL pointer.
  */
-void *
+struct xfs_mru_cache_elem *
 xfs_mru_cache_remove(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	xfs_mru_cache_elem_t *elem;
-	void		*value = NULL;
+	struct xfs_mru_cache_elem *elem;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
@@ -492,17 +471,11 @@ xfs_mru_cache_remove(
 
 	spin_lock(&mru->lock);
 	elem = radix_tree_delete(&mru->store, key);
-	if (elem) {
-		value = elem->value;
+	if (elem)
 		list_del(&elem->list_node);
-	}
-
 	spin_unlock(&mru->lock);
 
-	if (elem)
-		kmem_zone_free(xfs_mru_elem_zone, elem);
-
-	return value;
+	return elem;
 }
 
 /*
@@ -511,13 +484,14 @@ xfs_mru_cache_remove(
  */
 void
 xfs_mru_cache_delete(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	void		*value = xfs_mru_cache_remove(mru, key);
+	struct xfs_mru_cache_elem *elem;
 
-	if (value)
-		mru->free_func(key, value);
+	elem = xfs_mru_cache_remove(mru, key);
+	if (elem)
+		mru->free_func(elem);
 }
 
 /*
@@ -540,12 +514,12 @@ xfs_mru_cache_delete(
  * status, we need to help it get it right by annotating the path that does
  * not release the lock.
  */
-void *
+struct xfs_mru_cache_elem *
 xfs_mru_cache_lookup(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	xfs_mru_cache_elem_t *elem;
+	struct xfs_mru_cache_elem *elem;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
@@ -560,7 +534,7 @@ xfs_mru_cache_lookup(
 	} else
 		spin_unlock(&mru->lock);
 
-	return elem ? elem->value : NULL;
+	return elem;
 }
 
 /*
@@ -570,7 +544,8 @@ xfs_mru_cache_lookup(
  */
 void
 xfs_mru_cache_done(
-	xfs_mru_cache_t	*mru) __releases(mru->lock)
+	struct xfs_mru_cache	*mru)
+		__releases(mru->lock)
 {
 	spin_unlock(&mru->lock);
 }
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 36dd3ec..fb5245b 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -18,24 +18,15 @@
 #ifndef __XFS_MRU_CACHE_H__
 #define __XFS_MRU_CACHE_H__
 
+struct xfs_mru_cache;
 
-/* Function pointer type for callback to free a client's data pointer. */
-typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
+struct xfs_mru_cache_elem {
+	struct list_head list_node;
+	unsigned long	key;
+};
 
-typedef struct xfs_mru_cache
-{
-	struct radix_tree_root	store;     /* Core storage data structure.  */
-	struct list_head	*lists;    /* Array of lists, one per grp.  */
-	struct list_head	reap_list; /* Elements overdue for reaping. */
-	spinlock_t		lock;      /* Lock to protect this struct.  */
-	unsigned int		grp_count; /* Number of discrete groups.    */
-	unsigned int		grp_time;  /* Time period spanned by grps.  */
-	unsigned int		lru_grp;   /* Group containing time zero.   */
-	unsigned long		time_zero; /* Time first element was added. */
-	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
-	struct delayed_work	work;      /* Workqueue data for reaping.   */
-	unsigned int		queued;	   /* work has been queued */
-} xfs_mru_cache_t;
+/* Function pointer type for callback to free a client's data pointer. */
+typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
 
 int xfs_mru_cache_init(void);
 void xfs_mru_cache_uninit(void);
@@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
 			     xfs_mru_cache_free_func_t free_func);
 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
-				void *value);
-void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+		struct xfs_mru_cache_elem *elem);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
-void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_done(struct xfs_mru_cache *mru);
 
 #endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 348e4d2..061fad7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -843,22 +843,17 @@ xfs_qm_init_quotainfo(
 
 	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
 
-	if ((error = list_lru_init(&qinf->qi_lru))) {
-		kmem_free(qinf);
-		mp->m_quotainfo = NULL;
-		return error;
-	}
+	error = -list_lru_init(&qinf->qi_lru);
+	if (error)
+		goto out_free_qinf;
 
 	/*
 	 * See if quotainodes are setup, and if not, allocate them,
 	 * and change the superblock accordingly.
 	 */
-	if ((error = xfs_qm_init_quotainos(mp))) {
-		list_lru_destroy(&qinf->qi_lru);
-		kmem_free(qinf);
-		mp->m_quotainfo = NULL;
-		return error;
-	}
+	error = xfs_qm_init_quotainos(mp);
+	if (error)
+		goto out_free_lru;
 
 	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
 	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
@@ -870,8 +865,7 @@ xfs_qm_init_quotainfo(
 
 	/* Precalc some constants */
 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp,
-							qinf->qi_dqchunklen);
+	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
 
 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
 
@@ -918,7 +912,7 @@ xfs_qm_init_quotainfo(
 		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
 		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
 		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
- 
+
 		xfs_qm_dqdestroy(dqp);
 	} else {
 		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -935,6 +929,13 @@ xfs_qm_init_quotainfo(
 	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
 	register_shrinker(&qinf->qi_shrinker);
 	return 0;
+
+out_free_lru:
+	list_lru_destroy(&qinf->qi_lru);
+out_free_qinf:
+	kmem_free(qinf);
+	mp->m_quotainfo = NULL;
+	return error;
 }
 
 
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3daf5ea..72cbe65 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -959,7 +959,6 @@ xfs_qm_export_flags(
 STATIC int
 xfs_dqrele_inode(
 	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
 	int			flags,
 	void			*args)
 {
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h
index b3b2b10..137e209 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/xfs_quota_defs.h
@@ -156,6 +156,6 @@ typedef __uint16_t	xfs_qwarncnt_t;
 
 extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
 		       xfs_dqid_t id, uint type, uint flags, char *str);
-extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks);
+extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
 
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 0c0e41b..8baf61a 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -201,10 +201,6 @@ xfs_mount_validate_sb(
 	 * write validation, we don't need to check feature masks.
 	 */
 	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
-		xfs_alert(mp,
-"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
-"Use of these features in this kernel is at your own risk!");
-
 		if (xfs_sb_has_compat_feature(sbp,
 					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
 			xfs_warn(mp,
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 4484e51..82404da 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -238,7 +238,7 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);
 int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
 int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
 			uint32_t size, struct xfs_buf *bp);
-bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
 			uint32_t size, struct xfs_buf *bp);
 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
 				 struct xfs_inode *ip, struct xfs_ifork *ifp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2053767..8f0333b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -765,20 +765,18 @@ xfs_open_devices(
 	 * Setup xfs_mount buffer target pointers
 	 */
 	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
 	if (!mp->m_ddev_targp)
 		goto out_close_rtdev;
 
 	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-							mp->m_fsname);
+		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
 		if (!mp->m_rtdev_targp)
 			goto out_free_ddev_targ;
 	}
 
 	if (logdev && logdev != ddev) {
-		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-							mp->m_fsname);
+		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
 		if (!mp->m_logdev_targp)
 			goto out_free_rtdev_targ;
 	} else {
@@ -811,8 +809,7 @@ xfs_setup_devices(
 {
 	int			error;
 
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-				    mp->m_sb.sb_sectsize);
+	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 	if (error)
 		return error;
 
@@ -822,14 +819,12 @@ xfs_setup_devices(
 		if (xfs_sb_version_hassector(&mp->m_sb))
 			log_sector_size = mp->m_sb.sb_logsectsize;
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
-					    mp->m_sb.sb_blocksize,
 					    log_sector_size);
 		if (error)
 			return error;
 	}
 	if (mp->m_rtdev_targp) {
 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-					    mp->m_sb.sb_blocksize,
 					    mp->m_sb.sb_sectsize);
 		if (error)
 			return error;
@@ -1433,11 +1428,11 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_free_fsname;
 
-	error = xfs_init_mount_workqueues(mp);
+	error = -xfs_init_mount_workqueues(mp);
 	if (error)
 		goto out_close_devices;
 
-	error = xfs_icsb_init_counters(mp);
+	error = -xfs_icsb_init_counters(mp);
 	if (error)
 		goto out_destroy_workqueues;
 
@@ -1754,13 +1749,9 @@ init_xfs_fs(void)
 	if (error)
 		goto out_destroy_wq;
 
-	error = xfs_filestream_init();
-	if (error)
-		goto out_mru_cache_uninit;
-
 	error = xfs_buf_init();
 	if (error)
-		goto out_filestream_uninit;
+		goto out_mru_cache_uninit;
 
 	error = xfs_init_procfs();
 	if (error)
@@ -1787,8 +1778,6 @@ init_xfs_fs(void)
 	xfs_cleanup_procfs();
  out_buf_terminate:
 	xfs_buf_terminate();
- out_filestream_uninit:
-	xfs_filestream_uninit();
  out_mru_cache_uninit:
 	xfs_mru_cache_uninit();
  out_destroy_wq:
@@ -1807,7 +1796,6 @@ exit_xfs_fs(void)
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
-	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
 	xfs_destroy_workqueues();
 	xfs_destroy_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 52979aa..0816b40 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -92,7 +92,7 @@ xfs_readlink_bmap(
 
 		cur_chunk = bp->b_addr;
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
+			if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
 							byte_cnt, bp)) {
 				error = EFSCORRUPTED;
 				xfs_alert(mp,
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index 9b32052..23c2f25 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -80,7 +80,6 @@ xfs_symlink_hdr_set(
  */
 bool
 xfs_symlink_hdr_ok(
-	struct xfs_mount	*mp,
 	xfs_ino_t		ino,
 	uint32_t		offset,
 	uint32_t		size,
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index dee3279..1e85bcd 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -46,6 +46,7 @@
 #include "xfs_log_recover.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_filestream.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4ae41c..6910458 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
 
+DECLARE_EVENT_CLASS(xfs_filestream_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
+	TP_ARGS(ip, agno),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_agnumber_t, agno)
+		__field(int, streams)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->agno = agno;
+		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+	),
+	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->agno,
+		  __entry->streams)
+)
+#define DEFINE_FILESTREAM_EVENT(name) \
+DEFINE_EVENT(xfs_filestream_class, name, \
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
+	TP_ARGS(ip, agno))
+DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
+
+TRACE_EVENT(xfs_filestream_pick,
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno,
+		 xfs_extlen_t free, int nscan),
+	TP_ARGS(ip, agno, free, nscan),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_agnumber_t, agno)
+		__field(int, streams)
+		__field(xfs_extlen_t, free)
+		__field(int, nscan)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->agno = agno;
+		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+		__entry->free = free;
+		__entry->nscan = nscan;
+	),
+	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->agno,
+		  __entry->streams,
+		  __entry->free,
+		  __entry->nscan)
+);
+
 DECLARE_EVENT_CLASS(xfs_lock_class,
 	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
 		 unsigned long caller_ip),
@@ -603,6 +661,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
 DEFINE_INODE_EVENT(xfs_inactive_symlink);
 DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_zero_file_space);
 DEFINE_INODE_EVENT(xfs_collapse_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 54a5732..d039325 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -827,7 +827,7 @@ xfs_trans_committed_bulk(
 		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
 
 	spin_lock(&ailp->xa_lock);
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 }
 
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a728735..cb0f3a8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -173,7 +173,6 @@ xfs_trans_ail_cursor_next(
  */
 void
 xfs_trans_ail_cursor_done(
-	struct xfs_ail		*ailp,
 	struct xfs_ail_cursor	*cur)
 {
 	cur->item = NULL;
@@ -368,7 +367,7 @@ xfsaild_push(
 		 * If the AIL is empty or our push has reached the end we are
 		 * done now.
 		 */
-		xfs_trans_ail_cursor_done(ailp, &cur);
+		xfs_trans_ail_cursor_done(&cur);
 		spin_unlock(&ailp->xa_lock);
 		goto out_done;
 	}
@@ -453,7 +452,7 @@ xfsaild_push(
 			break;
 		lsn = lip->li_lsn;
 	}
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 
 	if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 12e86af..bd12818 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -133,8 +133,7 @@ struct xfs_log_item *	xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
 					xfs_lsn_t lsn);
 struct xfs_log_item *	xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur);
-void			xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
-					struct xfs_ail_cursor *cur);
+void			xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
 
 #if BITS_PER_LONG != 64
 static inline void