From 7d0fa3ecba2f12ceef93fffe615e5dd9b50bb794 Mon Sep 17 00:00:00 2001
From: Alain Renaud <arenaud@sgi.com>
Date: Fri, 8 Jun 2012 15:34:46 -0400
Subject: xfs: xfs_vm_writepage clear iomap_valid when !buffer_uptodate (REV2)

On filesytems with a block size smaller than PAGE_SIZE we currently have
a problem with unwritten extents.  If a we have multi-block page for
which an unwritten extent has been allocated, and only some of the
buffers have been written to, and they are not contiguous, we can expose
stale data from disk in the blocks between the writes after extent
conversion.

Example of a page with unwritten and real data.
buffer  content
0       empty  b_state = 0
1       DATA   b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten
2       DATA   b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten
3       empty  b_state = 0
4       empty  b_state = 0
5       DATA   b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten
6       DATA   b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten
7       empty  b_state = 0

Buffers 1, 2, 5, and 6 have been written to, leaving 0, 3, 4, and 7
empty.  Currently buffers 1, 2, 5, and 6 are added to a single ioend,
and when IO has completed, extent conversion creates a real extent from
block 1 through block 6, leaving 0 and 7 unwritten.  However buffers 3
and 4 were not written to disk, so stale data is exposed from those
blocks on a subsequent read.

Fix this by setting iomap_valid = 0 when we find a buffer that is not
Uptodate.  This ensures that buffers 5 and 6 are not added to the same
ioend as buffers 1 and 2.  Later these blocks will be converted into two
separate real extents, leaving the blocks in between unwritten.

Signed-off-by: Alain Renaud <arenaud@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index ae31c31..8dad722 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -981,10 +981,15 @@ xfs_vm_writepage(
 				imap_valid = 0;
 			}
 		} else {
-			if (PageUptodate(page)) {
+			if (PageUptodate(page))
 				ASSERT(buffer_mapped(bh));
-				imap_valid = 0;
-			}
+			/*
+			 * This buffer is not uptodate and will not be
+			 * written to disk.  Ensure that we will put any
+			 * subsequent writeable buffers into a new
+			 * ioend.
+			 */
+			imap_valid = 0;
 			continue;
 		}
 
-- 
cgit v0.10.2


From 0f2cf9d3d917b269645902506adaa4ff92b5e506 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Thu, 7 Jun 2012 15:44:32 +0800
Subject: xfs: fix debug_object WARN at xfs_alloc_vextent()

Fengguang reports:

[  780.529603] XFS (vdd): Ending clean mount
[  781.454590] ODEBUG: object is on stack, but not annotated
[  781.455433] ------------[ cut here ]------------
[  781.455433] WARNING: at /c/kernel-tests/sound/lib/debugobjects.c:301 __debug_object_init+0x173/0x1f1()
[  781.455433] Hardware name: Bochs
[  781.455433] Modules linked in:
[  781.455433] Pid: 26910, comm: kworker/0:2 Not tainted 3.4.0+ #51
[  781.455433] Call Trace:
[  781.455433]  [<ffffffff8106bc84>] warn_slowpath_common+0x83/0x9b
[  781.455433]  [<ffffffff8106bcb6>] warn_slowpath_null+0x1a/0x1c
[  781.455433]  [<ffffffff814919a5>] __debug_object_init+0x173/0x1f1
[  781.455433]  [<ffffffff81491c65>] debug_object_init+0x14/0x16
[  781.455433]  [<ffffffff8108842a>] __init_work+0x20/0x22
[  781.455433]  [<ffffffff8134ea56>] xfs_alloc_vextent+0x6c/0xd5

Use INIT_WORK_ONSTACK in xfs_alloc_vextent instead of INIT_WORK.

Reported-by: Wu Fengguang <wfg@linux.intel.com>
Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 229641f..a996e39 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2441,7 +2441,7 @@ xfs_alloc_vextent(
 	DECLARE_COMPLETION_ONSTACK(done);
 
 	args->done = &done;
-	INIT_WORK(&args->work, xfs_alloc_vextent_worker);
+	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
 	queue_work(xfs_alloc_wq, &args->work);
 	wait_for_completion(&done);
 	return args->result;
-- 
cgit v0.10.2


From d2c2819117176e139dc761873c664aaa770c79c9 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 8 Jun 2012 15:44:53 +1000
Subject: xfs: m_maxioffset is redundant

The m_maxioffset field in the struct xfs_mount contains the same
value as the superblock s_maxbytes field. There is no need to carry
two copies of this limit around, so use the VFS superblock version.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8dad722..84e3725 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -323,10 +323,10 @@ xfs_map_blocks(
 
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
-	ASSERT(offset <= mp->m_maxioffset);
+	ASSERT(offset <= mp->m_super->s_maxbytes);
 
-	if (offset + count > mp->m_maxioffset)
-		count = mp->m_maxioffset - offset;
+	if (offset + count > mp->m_super->s_maxbytes)
+		count = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
@@ -1162,9 +1162,9 @@ __xfs_get_blocks(
 		lockmode = xfs_ilock_map_shared(ip);
 	}
 
-	ASSERT(offset <= mp->m_maxioffset);
-	if (offset + size > mp->m_maxioffset)
-		size = mp->m_maxioffset - offset;
+	ASSERT(offset <= mp->m_super->s_maxbytes);
+	if (offset + size > mp->m_super->s_maxbytes)
+		size = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index aadfce6..4590cd1 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -416,8 +416,8 @@ retry:
 	 * Make sure preallocation does not create extents beyond the range we
 	 * actually support in this filesystem.
 	 */
-	if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset))
-		last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset);
+	if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes))
+		last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 
 	ASSERT(last_fsb > offset_fsb);
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 536021f..9536fd1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1200,8 +1200,6 @@ xfs_mountfs(
 
 	xfs_set_maxicount(mp);
 
-	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
-
 	error = xfs_uuid_mount(mp);
 	if (error)
 		goto out;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8b89c5a..47c6b3b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -176,7 +176,6 @@ typedef struct xfs_mount {
 	uint			m_qflags;	/* quota status flags */
 	xfs_trans_reservations_t m_reservations;/* precomputed res values */
 	__uint64_t		m_maxicount;	/* maximum inode count */
-	__uint64_t		m_maxioffset;	/* maximum inode offset */
 	__uint64_t		m_resblks;	/* total reserved blocks */
 	__uint64_t		m_resblks_avail;/* available reserved blocks */
 	__uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
@@ -297,7 +296,7 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 			PAGE_CACHE_SIZE));
 }
 
-#define XFS_MAXIOFFSET(mp)	((mp)->m_maxioffset)
+#define XFS_MAXIOFFSET(mp)	((mp)->m_super->s_maxbytes)
 
 #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
 				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
-- 
cgit v0.10.2


From 32972383ca46223aa2b129826b3789721ec147aa Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 8 Jun 2012 15:44:54 +1000
Subject: xfs: make largest supported offset less shouty

XFS_MAXIOFFSET() is just a simple macro that resolves to
mp->m_maxioffset. It doesn't need to exist, and it just makes the
code unnecessarily loud and shouty.

Make it quiet and easy to read.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 58b815e..848ffa77 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5517,7 +5517,7 @@ xfs_getbmap(
 		if (xfs_get_extsz_hint(ip) ||
 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
-			fixlen = XFS_MAXIOFFSET(mp);
+			fixlen = mp->m_super->s_maxbytes;
 		} else {
 			prealloced = 0;
 			fixlen = XFS_ISIZE(ip);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9f7ec15..59e22c9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -273,7 +273,7 @@ xfs_file_aio_read(
 		}
 	}
 
-	n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+	n = mp->m_super->s_maxbytes - iocb->ki_pos;
 	if (n <= 0 || size == 0)
 		return 0;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a59eea0..257f3c4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1226,7 +1226,7 @@ xfs_itruncate_extents(
 	 * then there is nothing to do.
 	 */
 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-	last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 	if (first_unmap_block == last_block)
 		return 0;
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 4590cd1..915edf6 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -285,7 +285,7 @@ xfs_iomap_eof_want_preallocate(
 	 * do any speculative allocation.
 	 */
 	start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
-	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 	while (count_fsb > 0) {
 		imaps = nimaps;
 		firstblock = NULLFSBLOCK;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 47c6b3b..90a4530 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -296,8 +296,6 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 			PAGE_CACHE_SIZE));
 }
 
-#define XFS_MAXIOFFSET(mp)	((mp)->m_super->s_maxbytes)
-
 #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
 				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 249db19..2e86fa0 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -940,7 +940,7 @@ xfs_qm_dqiterate(
 	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
 
 	lblkno = 0;
-	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 	do {
 		nmaps = XFS_DQITER_MAP_SIZE;
 		/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b6a82d8..c22f4e0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -174,7 +174,7 @@ xfs_free_eofblocks(
 	 * of the file.  If not, then there is nothing to do.
 	 */
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
-	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 	if (last_fsb <= end_fsb)
 		return 0;
 	map_len = last_fsb - end_fsb;
@@ -2262,10 +2262,10 @@ xfs_change_file_space(
 
 	llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
 
-	if (   (bf->l_start < 0)
-	    || (bf->l_start > XFS_MAXIOFFSET(mp))
-	    || (bf->l_start + llen < 0)
-	    || (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
+	if (bf->l_start < 0 ||
+	    bf->l_start > mp->m_super->s_maxbytes ||
+	    bf->l_start + llen < 0 ||
+	    bf->l_start + llen > mp->m_super->s_maxbytes)
 		return XFS_ERROR(EINVAL);
 
 	bf->l_whence = 0;
-- 
cgit v0.10.2


From 5276432997feb2366ac1e77949e94fe86a394813 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 8 Jun 2012 15:45:44 +1000
Subject: xfs: kill copy and paste segment checks in xfs_file_aio_read

The generic segment check code now returns a count of the number of
bytes in the iovec, so we don't need to roll our own anymore.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 59e22c9..c4559c6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -236,7 +236,6 @@ xfs_file_aio_read(
 	ssize_t			ret = 0;
 	int			ioflags = 0;
 	xfs_fsize_t		n;
-	unsigned long		seg;
 
 	XFS_STATS_INC(xs_read_calls);
 
@@ -247,19 +246,9 @@ xfs_file_aio_read(
 	if (file->f_mode & FMODE_NOCMTIME)
 		ioflags |= IO_INVIS;
 
-	/* START copy & waste from filemap.c */
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *iv = &iovp[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		size += iv->iov_len;
-		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-			return XFS_ERROR(-EINVAL);
-	}
-	/* END copy & waste from filemap.c */
+	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
+	if (ret < 0)
+		return ret;
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		xfs_buftarg_t	*target =
-- 
cgit v0.10.2


From 51c84223af604ce2d00d0416c30a38c50aed00bd Mon Sep 17 00:00:00 2001
From: Chen Baozi <baozich@gmail.com>
Date: Sat, 26 May 2012 00:48:47 +0800
Subject: xfs: fix typo in comment of xfs_dinode_t.

There should be "XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR" instead
of "XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR".

Signed-off-by: Chen Baozi <baozich@gmail.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index a372163..1d9643b 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -33,7 +33,7 @@ typedef struct xfs_timestamp {
  * variable size the leftover area split into a data and an attribute fork.
  * The format of the data and attribute fork depends on the format of the
  * inode as indicated by di_format and di_aformat.  To access the data and
- * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros
+ * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros
  * below.
  *
  * There is a very similar struct icdinode in xfs_inode which matches the
-- 
cgit v0.10.2


From 76e8f1386673b864cfca3c24c4d5814740e76465 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Mon, 11 Jun 2012 10:39:43 -0400
Subject: xfs: check for stale inode before acquiring iflock on push

An inode in the AIL can be flush locked and marked stale if
a cluster free transaction occurs at the right time. The
inode item is then marked as flushing, which causes xfsaild
to spin and leaves the filesystem stalled. This is
reproduced by running xfstests 273 in a loop for an
extended period of time.

Check for stale inodes before the flush lock. This marks
the inode as pinned, leads to a log flush and allows the
filesystem to proceed.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6cdbf90c..d041d47 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -505,6 +505,14 @@ xfs_inode_item_push(
 	}
 
 	/*
+	 * Stale inode items should force out the iclog.
+	 */
+	if (ip->i_flags & XFS_ISTALE) {
+		rval = XFS_ITEM_PINNED;
+		goto out_unlock;
+	}
+
+	/*
 	 * Someone else is already flushing the inode.  Nothing we can do
 	 * here but wait for the flush to finish and remove the item from
 	 * the AIL.
@@ -514,15 +522,6 @@ xfs_inode_item_push(
 		goto out_unlock;
 	}
 
-	/*
-	 * Stale inode items should force out the iclog.
-	 */
-	if (ip->i_flags & XFS_ISTALE) {
-		xfs_ifunlock(ip);
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
-		return XFS_ITEM_PINNED;
-	}
-
 	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 
-- 
cgit v0.10.2


From 079da28c64ebeca357adae77aea3ae7160e45d98 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 12 Jun 2012 14:20:26 +1000
Subject: xfs: fix allocbt cursor leak in xfs_alloc_ag_vextent_near

When we fail to find an matching extent near the requested extent
specification during a left-right distance search in
xfs_alloc_ag_vextent_near, we fail to free the original cursor that
we used to look up the XFS_BTNUM_CNT tree and hence leak it.

Reported-by: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a996e39..9d1aeb7 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1080,6 +1080,7 @@ restart:
 			goto restart;
 		}
 
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		trace_xfs_alloc_size_neither(args);
 		args->agbno = NULLAGBLOCK;
 		return 0;
-- 
cgit v0.10.2


From bcf62ab64d1ba257dd9d4283a077a7219a05073a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Jun 2012 00:32:26 +0200
Subject: xfs: Fix overallocation in xfs_buf_allocate_memory()

Commit de1cbee which removed b_file_offset in favor of b_bn introduced a bug
causing xfs_buf_allocate_memory() to overestimate the number of necessary
pages. The problem is that xfs_buf_alloc() sets b_bn to -1 and thus effectively
every buffer is straddling a page boundary which causes
xfs_buf_allocate_memory() to allocate two pages and use vmalloc() for access
which is unnecessary.

Dave says xfs_buf_alloc() doesn't need to set b_bn to -1 anymore since the
buffer is inserted into the cache only after being fully initialized now.
So just make xfs_buf_alloc() fill in proper block number from the beginning.

CC: David Chinner <dchinner@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 172d3cc..a4beb42 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,14 +201,7 @@ xfs_buf_alloc(
 	bp->b_length = numblks;
 	bp->b_io_length = numblks;
 	bp->b_flags = flags;
-
-	/*
-	 * We do not set the block number here in the buffer because we have not
-	 * finished initialising the buffer. We insert the buffer into the cache
-	 * in this state, so this ensures that we are unable to do IO on a
-	 * buffer that hasn't been fully initialised.
-	 */
-	bp->b_bn = XFS_BUF_DADDR_NULL;
+	bp->b_bn = blkno;
 	atomic_set(&bp->b_pin_count, 0);
 	init_waitqueue_head(&bp->b_waiters);
 
@@ -567,11 +560,6 @@ xfs_buf_get(
 	if (bp != new_bp)
 		xfs_buf_free(new_bp);
 
-	/*
-	 * Now we have a workable buffer, fill in the block number so
-	 * that we can do IO on it.
-	 */
-	bp->b_bn = blkno;
 	bp->b_io_length = bp->b_length;
 
 found:
@@ -772,7 +760,7 @@ xfs_buf_get_uncached(
 	int			error, i;
 	xfs_buf_t		*bp;
 
-	bp = xfs_buf_alloc(target, 0, numblks, 0);
+	bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
 	if (unlikely(bp == NULL))
 		goto fail;
 
-- 
cgit v0.10.2


From 11159a0500c1eb7a8a2de37b7dceb53373c75350 Mon Sep 17 00:00:00 2001
From: Ben Myers <bpm@sgi.com>
Date: Fri, 25 May 2012 15:45:36 -0500
Subject: xfs: shutdown xfs_sync_worker before the log

Revert commit 1307bbd, which uses the s_umount semaphore to provide
exclusion between xfs_sync_worker and unmount, in favor of shutting down
the sync worker before freeing the log in xfs_log_unmount.  This is a
cleaner way of resolving the race between xfs_sync_worker and unmount
than using s_umount.

Signed-off-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f30d980..0e1a64f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -810,6 +810,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 void
 xfs_log_unmount(xfs_mount_t *mp)
 {
+	cancel_delayed_work_sync(&mp->m_sync_work);
 	xfs_trans_ail_destroy(mp);
 	xlog_dealloc_log(mp->m_log);
 }
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index c9d3409..1e9ee06 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -386,23 +386,23 @@ xfs_sync_worker(
 	 * We shouldn't write/force the log if we are in the mount/unmount
 	 * process or on a read only filesystem. The workqueue still needs to be
 	 * active in both cases, however, because it is used for inode reclaim
-	 * during these times.  Use the s_umount semaphore to provide exclusion
-	 * with unmount.
+	 * during these times.  Use the MS_ACTIVE flag to avoid doing anything
+	 * during mount.  Doing work during unmount is avoided by calling
+	 * cancel_delayed_work_sync on this work queue before tearing down
+	 * the ail and the log in xfs_log_unmount.
 	 */
-	if (down_read_trylock(&mp->m_super->s_umount)) {
-		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-			/* dgc: errors ignored here */
-			if (mp->m_super->s_frozen == SB_UNFROZEN &&
-			    xfs_log_need_covered(mp))
-				error = xfs_fs_log_dummy(mp);
-			else
-				xfs_log_force(mp, 0);
-
-			/* start pushing all the metadata that is currently
-			 * dirty */
-			xfs_ail_push_all(mp->m_ail);
-		}
-		up_read(&mp->m_super->s_umount);
+	if (!(mp->m_super->s_flags & MS_ACTIVE) &&
+	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+		/* dgc: errors ignored here */
+		if (mp->m_super->s_frozen == SB_UNFROZEN &&
+		    xfs_log_need_covered(mp))
+			error = xfs_fs_log_dummy(mp);
+		else
+			xfs_log_force(mp, 0);
+
+		/* start pushing all the metadata that is currently
+		 * dirty */
+		xfs_ail_push_all(mp->m_ail);
 	}
 
 	/* queue us up again */
-- 
cgit v0.10.2


From ad223e6030be017470e46f153de27a43979759e0 Mon Sep 17 00:00:00 2001
From: Mark Tinguely <tinguely@sgi.com>
Date: Thu, 14 Jun 2012 09:22:15 -0500
Subject: xfs: rename log structure to xlog

Rename the XFS log structure to xlog to help crash distinquish it from the
other logs in Linux.

Signed-off-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0e1a64f..d90d4a3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -38,13 +38,21 @@
 kmem_zone_t	*xfs_log_ticket_zone;
 
 /* Local miscellaneous function prototypes */
-STATIC int	 xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
-				    xlog_in_core_t **, xfs_lsn_t *);
+STATIC int
+xlog_commit_record(
+	struct xlog		*log,
+	struct xlog_ticket	*ticket,
+	struct xlog_in_core	**iclog,
+	xfs_lsn_t		*commitlsnp);
+
 STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
 				xfs_buftarg_t	*log_target,
 				xfs_daddr_t	blk_offset,
 				int		num_bblks);
-STATIC int	 xlog_space_left(struct log *log, atomic64_t *head);
+STATIC int
+xlog_space_left(
+	struct xlog		*log,
+	atomic64_t		*head);
 STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
 STATIC void	 xlog_dealloc_log(xlog_t *log);
 
@@ -64,8 +72,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t		*log,
 				     int		eventual_size);
 STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);
 
-STATIC void xlog_grant_push_ail(struct log	*log,
-				int		need_bytes);
+STATIC void
+xlog_grant_push_ail(
+	struct xlog	*log,
+	int		need_bytes);
 STATIC void xlog_regrant_reserve_log_space(xlog_t	 *log,
 					   xlog_ticket_t *ticket);
 STATIC void xlog_ungrant_log_space(xlog_t	 *log,
@@ -73,7 +83,9 @@ STATIC void xlog_ungrant_log_space(xlog_t	 *log,
 
 #if defined(DEBUG)
 STATIC void	xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void	xlog_verify_grant_tail(struct log *log);
+STATIC void
+xlog_verify_grant_tail(
+	struct xlog	*log);
 STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
 				  int count, boolean_t syncing);
 STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
@@ -89,9 +101,9 @@ STATIC int	xlog_iclogs_empty(xlog_t *log);
 
 static void
 xlog_grant_sub_space(
-	struct log	*log,
-	atomic64_t	*head,
-	int		bytes)
+	struct xlog		*log,
+	atomic64_t		*head,
+	int			bytes)
 {
 	int64_t	head_val = atomic64_read(head);
 	int64_t new, old;
@@ -115,9 +127,9 @@ xlog_grant_sub_space(
 
 static void
 xlog_grant_add_space(
-	struct log	*log,
-	atomic64_t	*head,
-	int		bytes)
+	struct xlog		*log,
+	atomic64_t		*head,
+	int			bytes)
 {
 	int64_t	head_val = atomic64_read(head);
 	int64_t new, old;
@@ -165,7 +177,7 @@ xlog_grant_head_wake_all(
 
 static inline int
 xlog_ticket_reservation(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_grant_head	*head,
 	struct xlog_ticket	*tic)
 {
@@ -182,7 +194,7 @@ xlog_ticket_reservation(
 
 STATIC bool
 xlog_grant_head_wake(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_grant_head	*head,
 	int			*free_bytes)
 {
@@ -204,7 +216,7 @@ xlog_grant_head_wake(
 
 STATIC int
 xlog_grant_head_wait(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_grant_head	*head,
 	struct xlog_ticket	*tic,
 	int			need_bytes)
@@ -256,7 +268,7 @@ shutdown:
  */
 STATIC int
 xlog_grant_head_check(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_grant_head	*head,
 	struct xlog_ticket	*tic,
 	int			*need_bytes)
@@ -323,7 +335,7 @@ xfs_log_regrant(
 	struct xfs_mount	*mp,
 	struct xlog_ticket	*tic)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	int			need_bytes;
 	int			error = 0;
 
@@ -389,7 +401,7 @@ xfs_log_reserve(
 	bool			permanent,
 	uint		 	t_type)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	struct xlog_ticket	*tic;
 	int			need_bytes;
 	int			error = 0;
@@ -465,7 +477,7 @@ xfs_log_done(
 	struct xlog_in_core	**iclog,
 	uint			flags)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	xfs_lsn_t		lsn = 0;
 
 	if (XLOG_FORCED_SHUTDOWN(log) ||
@@ -839,7 +851,7 @@ void
 xfs_log_space_wake(
 	struct xfs_mount	*mp)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	int			free_bytes;
 
 	if (XLOG_FORCED_SHUTDOWN(log))
@@ -917,7 +929,7 @@ xfs_lsn_t
 xlog_assign_tail_lsn_locked(
 	struct xfs_mount	*mp)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	struct xfs_log_item	*lip;
 	xfs_lsn_t		tail_lsn;
 
@@ -966,7 +978,7 @@ xlog_assign_tail_lsn(
  */
 STATIC int
 xlog_space_left(
-	struct log	*log,
+	struct xlog	*log,
 	atomic64_t	*head)
 {
 	int		free_bytes;
@@ -1278,7 +1290,7 @@ out:
  */
 STATIC int
 xlog_commit_record(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_ticket	*ticket,
 	struct xlog_in_core	**iclog,
 	xfs_lsn_t		*commitlsnp)
@@ -1312,7 +1324,7 @@ xlog_commit_record(
  */
 STATIC void
 xlog_grant_push_ail(
-	struct log	*log,
+	struct xlog	*log,
 	int		need_bytes)
 {
 	xfs_lsn_t	threshold_lsn = 0;
@@ -1791,7 +1803,7 @@ xlog_write_start_rec(
 
 static xlog_op_header_t *
 xlog_write_setup_ophdr(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_op_header	*ophdr,
 	struct xlog_ticket	*ticket,
 	uint			flags)
@@ -1874,7 +1886,7 @@ xlog_write_setup_copy(
 
 static int
 xlog_write_copy_finish(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_in_core	*iclog,
 	uint			flags,
 	int			*record_cnt,
@@ -1959,7 +1971,7 @@ xlog_write_copy_finish(
  */
 int
 xlog_write(
-	struct log		*log,
+	struct xlog		*log,
 	struct xfs_log_vec	*log_vector,
 	struct xlog_ticket	*ticket,
 	xfs_lsn_t		*start_lsn,
@@ -2822,7 +2834,7 @@ _xfs_log_force(
 	uint			flags,
 	int			*log_flushed)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	struct xlog_in_core	*iclog;
 	xfs_lsn_t		lsn;
 
@@ -2970,7 +2982,7 @@ _xfs_log_force_lsn(
 	uint			flags,
 	int			*log_flushed)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	struct xlog_in_core	*iclog;
 	int			already_slept = 0;
 
@@ -3148,7 +3160,7 @@ xfs_log_ticket_get(
  */
 xlog_ticket_t *
 xlog_ticket_alloc(
-	struct log	*log,
+	struct xlog	*log,
 	int		unit_bytes,
 	int		cnt,
 	char		client,
@@ -3279,7 +3291,7 @@ xlog_ticket_alloc(
  */
 void
 xlog_verify_dest_ptr(
-	struct log	*log,
+	struct xlog	*log,
 	char		*ptr)
 {
 	int i;
@@ -3308,7 +3320,7 @@ xlog_verify_dest_ptr(
  */
 STATIC void
 xlog_verify_grant_tail(
-	struct log	*log)
+	struct xlog	*log)
 {
 	int		tail_cycle, tail_blocks;
 	int		cycle, space;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7d6197c..ddc4529 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -44,7 +44,7 @@
  */
 static struct xlog_ticket *
 xlog_cil_ticket_alloc(
-	struct log	*log)
+	struct xlog	*log)
 {
 	struct xlog_ticket *tic;
 
@@ -72,7 +72,7 @@ xlog_cil_ticket_alloc(
  */
 void
 xlog_cil_init_post_recovery(
-	struct log	*log)
+	struct xlog	*log)
 {
 	log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
 	log->l_cilp->xc_ctx->sequence = 1;
@@ -182,7 +182,7 @@ xlog_cil_prepare_log_vecs(
  */
 STATIC void
 xfs_cil_prepare_item(
-	struct log		*log,
+	struct xlog		*log,
 	struct xfs_log_vec	*lv,
 	int			*len,
 	int			*diff_iovecs)
@@ -231,7 +231,7 @@ xfs_cil_prepare_item(
  */
 static void
 xlog_cil_insert_items(
-	struct log		*log,
+	struct xlog		*log,
 	struct xfs_log_vec	*log_vector,
 	struct xlog_ticket	*ticket)
 {
@@ -373,7 +373,7 @@ xlog_cil_committed(
  */
 STATIC int
 xlog_cil_push(
-	struct log		*log)
+	struct xlog		*log)
 {
 	struct xfs_cil		*cil = log->l_cilp;
 	struct xfs_log_vec	*lv;
@@ -601,7 +601,7 @@ xlog_cil_push_work(
  */
 static void
 xlog_cil_push_background(
-	struct log	*log)
+	struct xlog	*log)
 {
 	struct xfs_cil	*cil = log->l_cilp;
 
@@ -629,7 +629,7 @@ xlog_cil_push_background(
 
 static void
 xlog_cil_push_foreground(
-	struct log	*log,
+	struct xlog	*log,
 	xfs_lsn_t	push_seq)
 {
 	struct xfs_cil	*cil = log->l_cilp;
@@ -683,7 +683,7 @@ xfs_log_commit_cil(
 	xfs_lsn_t		*commit_lsn,
 	int			flags)
 {
-	struct log		*log = mp->m_log;
+	struct xlog		*log = mp->m_log;
 	int			log_flags = 0;
 	struct xfs_log_vec	*log_vector;
 
@@ -754,7 +754,7 @@ xfs_log_commit_cil(
  */
 xfs_lsn_t
 xlog_cil_force_lsn(
-	struct log	*log,
+	struct xlog	*log,
 	xfs_lsn_t	sequence)
 {
 	struct xfs_cil		*cil = log->l_cilp;
@@ -833,7 +833,7 @@ xfs_log_item_in_current_chkpt(
  */
 int
 xlog_cil_init(
-	struct log	*log)
+	struct xlog	*log)
 {
 	struct xfs_cil	*cil;
 	struct xfs_cil_ctx *ctx;
@@ -869,7 +869,7 @@ xlog_cil_init(
 
 void
 xlog_cil_destroy(
-	struct log	*log)
+	struct xlog	*log)
 {
 	if (log->l_cilp->xc_ctx) {
 		if (log->l_cilp->xc_ctx->ticket)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5bc3326..72eba22 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,7 @@
 #define __XFS_LOG_PRIV_H__
 
 struct xfs_buf;
-struct log;
+struct xlog;
 struct xlog_ticket;
 struct xfs_mount;
 
@@ -352,7 +352,7 @@ typedef struct xlog_in_core {
 	struct xlog_in_core	*ic_next;
 	struct xlog_in_core	*ic_prev;
 	struct xfs_buf		*ic_bp;
-	struct log		*ic_log;
+	struct xlog		*ic_log;
 	int			ic_size;
 	int			ic_offset;
 	int			ic_bwritecnt;
@@ -409,7 +409,7 @@ struct xfs_cil_ctx {
  * operations almost as efficient as the old logging methods.
  */
 struct xfs_cil {
-	struct log		*xc_log;
+	struct xlog		*xc_log;
 	struct list_head	xc_cil;
 	spinlock_t		xc_cil_lock;
 	struct xfs_cil_ctx	*xc_ctx;
@@ -487,7 +487,7 @@ struct xlog_grant_head {
  * overflow 31 bits worth of byte offset, so using a byte number will mean
  * that round off problems won't occur when releasing partial reservations.
  */
-typedef struct log {
+typedef struct xlog {
 	/* The following fields don't need locking */
 	struct xfs_mount	*l_mp;	        /* mount point */
 	struct xfs_ail		*l_ailp;	/* AIL log is working with */
@@ -553,9 +553,14 @@ extern int	 xlog_recover_finish(xlog_t *log);
 extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 
 extern kmem_zone_t *xfs_log_ticket_zone;
-struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
-				int count, char client, bool permanent,
-				xfs_km_flags_t alloc_flags);
+struct xlog_ticket *
+xlog_ticket_alloc(
+	struct xlog	*log,
+	int		unit_bytes,
+	int		count,
+	char		client,
+	bool		permanent,
+	xfs_km_flags_t	alloc_flags);
 
 
 static inline void
@@ -567,9 +572,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 }
 
 void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
-int	xlog_write(struct log *log, struct xfs_log_vec *log_vector,
-				struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
-				xlog_in_core_t **commit_iclog, uint flags);
+int
+xlog_write(
+	struct xlog		*log,
+	struct xfs_log_vec	*log_vector,
+	struct xlog_ticket	*tic,
+	xfs_lsn_t		*start_lsn,
+	struct xlog_in_core	**commit_iclog,
+	uint			flags);
 
 /*
  * When we crack an atomic LSN, we sample it first so that the value will not
@@ -629,17 +639,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
 /*
  * Committed Item List interfaces
  */
-int	xlog_cil_init(struct log *log);
-void	xlog_cil_init_post_recovery(struct log *log);
-void	xlog_cil_destroy(struct log *log);
+int
+xlog_cil_init(struct xlog *log);
+void
+xlog_cil_init_post_recovery(struct xlog *log);
+void
+xlog_cil_destroy(struct xlog *log);
 
 /*
  * CIL force routines
  */
-xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+xfs_lsn_t
+xlog_cil_force_lsn(
+	struct xlog *log,
+	xfs_lsn_t sequence);
 
 static inline void
-xlog_cil_force(struct log *log)
+xlog_cil_force(struct xlog *log)
 {
 	xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ca38690..a7be98a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1471,8 +1471,8 @@ xlog_recover_add_item(
 
 STATIC int
 xlog_recover_add_to_cont_trans(
-	struct log		*log,
-	xlog_recover_t		*trans,
+	struct xlog		*log,
+	struct xlog_recover	*trans,
 	xfs_caddr_t		dp,
 	int			len)
 {
@@ -1517,8 +1517,8 @@ xlog_recover_add_to_cont_trans(
  */
 STATIC int
 xlog_recover_add_to_trans(
-	struct log		*log,
-	xlog_recover_t		*trans,
+	struct xlog		*log,
+	struct xlog_recover	*trans,
 	xfs_caddr_t		dp,
 	int			len)
 {
@@ -1588,8 +1588,8 @@ xlog_recover_add_to_trans(
  */
 STATIC int
 xlog_recover_reorder_trans(
-	struct log		*log,
-	xlog_recover_t		*trans,
+	struct xlog		*log,
+	struct xlog_recover	*trans,
 	int			pass)
 {
 	xlog_recover_item_t	*item, *n;
@@ -1642,8 +1642,8 @@ xlog_recover_reorder_trans(
  */
 STATIC int
 xlog_recover_buffer_pass1(
-	struct log		*log,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
 {
 	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
 	struct list_head	*bucket;
@@ -1696,7 +1696,7 @@ xlog_recover_buffer_pass1(
  */
 STATIC int
 xlog_check_buffer_cancelled(
-	struct log		*log,
+	struct xlog		*log,
 	xfs_daddr_t		blkno,
 	uint			len,
 	ushort			flags)
@@ -2689,9 +2689,9 @@ xlog_recover_free_trans(
 
 STATIC int
 xlog_recover_commit_pass1(
-	struct log		*log,
-	struct xlog_recover	*trans,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct xlog_recover		*trans,
+	struct xlog_recover_item	*item)
 {
 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
 
@@ -2716,10 +2716,10 @@ xlog_recover_commit_pass1(
 
 STATIC int
 xlog_recover_commit_pass2(
-	struct log		*log,
-	struct xlog_recover	*trans,
-	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct xlog_recover		*trans,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item)
 {
 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
 
@@ -2753,7 +2753,7 @@ xlog_recover_commit_pass2(
  */
 STATIC int
 xlog_recover_commit_trans(
-	struct log		*log,
+	struct xlog		*log,
 	struct xlog_recover	*trans,
 	int			pass)
 {
@@ -2793,8 +2793,8 @@ out:
 
 STATIC int
 xlog_recover_unmount_trans(
-	struct log		*log,
-	xlog_recover_t		*trans)
+	struct xlog		*log,
+	struct xlog_recover	*trans)
 {
 	/* Do nothing now */
 	xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 90a4530..8724336 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,7 @@ typedef struct xfs_trans_reservations {
 
 #include "xfs_sync.h"
 
-struct log;
+struct xlog;
 struct xfs_mount_args;
 struct xfs_inode;
 struct xfs_bmbt_irec;
@@ -133,7 +133,7 @@ typedef struct xfs_mount {
 	uint			m_readio_blocks; /* min read size blocks */
 	uint			m_writeio_log;	/* min write size log bytes */
 	uint			m_writeio_blocks; /* min write size blocks */
-	struct log		*m_log;		/* log specific stuff */
+	struct xlog		*m_log;		/* log specific stuff */
 	int			m_logbufs;	/* number of log buffers */
 	int			m_logbsize;	/* size of each log buffer */
 	uint			m_rsumlevels;	/* rt summary levels */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7cf9d35..caf5dab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -32,7 +32,7 @@ struct xfs_da_node_entry;
 struct xfs_dquot;
 struct xfs_log_item;
 struct xlog_ticket;
-struct log;
+struct xlog;
 struct xlog_recover;
 struct xlog_recover_item;
 struct xfs_buf_log_format;
@@ -762,7 +762,7 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_force);
 DEFINE_DQUOT_EVENT(xfs_dqflush_done);
 
 DECLARE_EVENT_CLASS(xfs_loggrant_class,
-	TP_PROTO(struct log *log, struct xlog_ticket *tic),
+	TP_PROTO(struct xlog *log, struct xlog_ticket *tic),
 	TP_ARGS(log, tic),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -830,7 +830,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
 
 #define DEFINE_LOGGRANT_EVENT(name) \
 DEFINE_EVENT(xfs_loggrant_class, name, \
-	TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+	TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \
 	TP_ARGS(log, tic))
 DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
 DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
@@ -1664,7 +1664,7 @@ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
 
 DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-	TP_PROTO(struct log *log, struct xlog_recover *trans,
+	TP_PROTO(struct xlog *log, struct xlog_recover *trans,
 		struct xlog_recover_item *item, int pass),
 	TP_ARGS(log, trans, item, pass),
 	TP_STRUCT__entry(
@@ -1698,7 +1698,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
 
 #define DEFINE_LOG_RECOVER_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_item_class, name, \
-	TP_PROTO(struct log *log, struct xlog_recover *trans, \
+	TP_PROTO(struct xlog *log, struct xlog_recover *trans, \
 		struct xlog_recover_item *item, int pass), \
 	TP_ARGS(log, trans, item, pass))
 
@@ -1709,7 +1709,7 @@ DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
 DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
 
 DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+	TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f),
 	TP_ARGS(log, buf_f),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -1739,7 +1739,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
 
 #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+	TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \
 	TP_ARGS(log, buf_f))
 
 DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
@@ -1752,7 +1752,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
 DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
 
 DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+	TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f),
 	TP_ARGS(log, in_f),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -1790,7 +1790,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
 )
 #define DEFINE_LOG_RECOVER_INO_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+	TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \
 	TP_ARGS(log, in_f))
 
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-- 
cgit v0.10.2


From 9a8d2fdbb47aaa1eaa136b89da5e5e6b60015c78 Mon Sep 17 00:00:00 2001
From: Mark Tinguely <tinguely@sgi.com>
Date: Thu, 14 Jun 2012 09:22:16 -0500
Subject: xfs: remove xlog_t typedef

Remove the xlog_t type definitions.

Signed-off-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d90d4a3..7f4f937 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -45,51 +45,85 @@ xlog_commit_record(
 	struct xlog_in_core	**iclog,
 	xfs_lsn_t		*commitlsnp);
 
-STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
-				xfs_buftarg_t	*log_target,
-				xfs_daddr_t	blk_offset,
-				int		num_bblks);
+STATIC struct xlog *
+xlog_alloc_log(
+	struct xfs_mount	*mp,
+	struct xfs_buftarg	*log_target,
+	xfs_daddr_t		blk_offset,
+	int			num_bblks);
 STATIC int
 xlog_space_left(
 	struct xlog		*log,
 	atomic64_t		*head);
-STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void	 xlog_dealloc_log(xlog_t *log);
+STATIC int
+xlog_sync(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog);
+STATIC void
+xlog_dealloc_log(
+	struct xlog		*log);
 
 /* local state machine functions */
 STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog);
-STATIC int  xlog_state_get_iclog_space(xlog_t		*log,
-				       int		len,
-				       xlog_in_core_t	**iclog,
-				       xlog_ticket_t	*ticket,
-				       int		*continued_write,
-				       int		*logoffsetp);
-STATIC int  xlog_state_release_iclog(xlog_t		*log,
-				     xlog_in_core_t	*iclog);
-STATIC void xlog_state_switch_iclogs(xlog_t		*log,
-				     xlog_in_core_t *iclog,
-				     int		eventual_size);
-STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);
+STATIC void
+xlog_state_do_callback(
+	struct xlog		*log,
+	int			aborted,
+	struct xlog_in_core	*iclog);
+STATIC int
+xlog_state_get_iclog_space(
+	struct xlog		*log,
+	int			len,
+	struct xlog_in_core	**iclog,
+	struct xlog_ticket	*ticket,
+	int			*continued_write,
+	int			*logoffsetp);
+STATIC int
+xlog_state_release_iclog(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog);
+STATIC void
+xlog_state_switch_iclogs(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			eventual_size);
+STATIC void
+xlog_state_want_sync(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog);
 
 STATIC void
 xlog_grant_push_ail(
-	struct xlog	*log,
-	int		need_bytes);
-STATIC void xlog_regrant_reserve_log_space(xlog_t	 *log,
-					   xlog_ticket_t *ticket);
-STATIC void xlog_ungrant_log_space(xlog_t	 *log,
-				   xlog_ticket_t *ticket);
+	struct xlog		*log,
+	int			need_bytes);
+STATIC void
+xlog_regrant_reserve_log_space(
+	struct xlog		*log,
+	struct xlog_ticket	*ticket);
+STATIC void
+xlog_ungrant_log_space(
+	struct xlog		*log,
+	struct xlog_ticket	*ticket);
 
 #if defined(DEBUG)
-STATIC void	xlog_verify_dest_ptr(xlog_t *log, char *ptr);
+STATIC void
+xlog_verify_dest_ptr(
+	struct xlog		*log,
+	char			*ptr);
 STATIC void
 xlog_verify_grant_tail(
-	struct xlog	*log);
-STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
-				  int count, boolean_t syncing);
-STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
-				     xfs_lsn_t tail_lsn);
+	struct xlog *log);
+STATIC void
+xlog_verify_iclog(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			count,
+	boolean_t		syncing);
+STATIC void
+xlog_verify_tail_lsn(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	xfs_lsn_t		tail_lsn);
 #else
 #define xlog_verify_dest_ptr(a,b)
 #define xlog_verify_grant_tail(a)
@@ -97,7 +131,9 @@ STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
 #define xlog_verify_tail_lsn(a,b,c)
 #endif
 
-STATIC int	xlog_iclogs_empty(xlog_t *log);
+STATIC int
+xlog_iclogs_empty(
+	struct xlog		*log);
 
 static void
 xlog_grant_sub_space(
@@ -684,7 +720,7 @@ xfs_log_mount_finish(xfs_mount_t *mp)
 int
 xfs_log_unmount_write(xfs_mount_t *mp)
 {
-	xlog_t		 *log = mp->m_log;
+	struct xlog	 *log = mp->m_log;
 	xlog_in_core_t	 *iclog;
 #ifdef DEBUG
 	xlog_in_core_t	 *first_iclog;
@@ -893,7 +929,7 @@ int
 xfs_log_need_covered(xfs_mount_t *mp)
 {
 	int		needed = 0;
-	xlog_t		*log = mp->m_log;
+	struct xlog	*log = mp->m_log;
 
 	if (!xfs_fs_writable(mp))
 		return 0;
@@ -1024,9 +1060,9 @@ xlog_space_left(
 void
 xlog_iodone(xfs_buf_t *bp)
 {
-	xlog_in_core_t	*iclog = bp->b_fspriv;
-	xlog_t		*l = iclog->ic_log;
-	int		aborted = 0;
+	struct xlog_in_core	*iclog = bp->b_fspriv;
+	struct xlog		*l = iclog->ic_log;
+	int			aborted = 0;
 
 	/*
 	 * Race to shutdown the filesystem if we see an error.
@@ -1067,8 +1103,9 @@ xlog_iodone(xfs_buf_t *bp)
  */
 
 STATIC void
-xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
-			   xlog_t	*log)
+xlog_get_iclog_buffer_size(
+	struct xfs_mount	*mp,
+	struct xlog		*log)
 {
 	int size;
 	int xhdrs;
@@ -1129,13 +1166,14 @@ done:
  * Its primary purpose is to fill in enough, so recovery can occur.  However,
  * some other stuff may be filled in too.
  */
-STATIC xlog_t *
-xlog_alloc_log(xfs_mount_t	*mp,
-	       xfs_buftarg_t	*log_target,
-	       xfs_daddr_t	blk_offset,
-	       int		num_bblks)
+STATIC struct xlog *
+xlog_alloc_log(
+	struct xfs_mount	*mp,
+	struct xfs_buftarg	*log_target,
+	xfs_daddr_t		blk_offset,
+	int			num_bblks)
 {
-	xlog_t			*log;
+	struct xlog		*log;
 	xlog_rec_header_t	*head;
 	xlog_in_core_t		**iclogp;
 	xlog_in_core_t		*iclog, *prev_iclog=NULL;
@@ -1144,7 +1182,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	int			error = ENOMEM;
 	uint			log2_size = 0;
 
-	log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
+	log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
 	if (!log) {
 		xfs_warn(mp, "Log allocation failed: No memory!");
 		goto out;
@@ -1434,8 +1472,9 @@ xlog_bdstrat(
  */
 
 STATIC int
-xlog_sync(xlog_t		*log,
-	  xlog_in_core_t	*iclog)
+xlog_sync(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog)
 {
 	xfs_caddr_t	dptr;		/* pointer to byte sized element */
 	xfs_buf_t	*bp;
@@ -1584,7 +1623,8 @@ xlog_sync(xlog_t		*log,
  * Deallocate a log structure
  */
 STATIC void
-xlog_dealloc_log(xlog_t *log)
+xlog_dealloc_log(
+	struct xlog	*log)
 {
 	xlog_in_core_t	*iclog, *next_iclog;
 	int		i;
@@ -1616,10 +1656,11 @@ xlog_dealloc_log(xlog_t *log)
  */
 /* ARGSUSED */
 static inline void
-xlog_state_finish_copy(xlog_t		*log,
-		       xlog_in_core_t	*iclog,
-		       int		record_cnt,
-		       int		copy_bytes)
+xlog_state_finish_copy(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			record_cnt,
+	int			copy_bytes)
 {
 	spin_lock(&log->l_icloglock);
 
@@ -2142,7 +2183,8 @@ xlog_write(
  * State Change: DIRTY -> ACTIVE
  */
 STATIC void
-xlog_state_clean_log(xlog_t *log)
+xlog_state_clean_log(
+	struct xlog *log)
 {
 	xlog_in_core_t	*iclog;
 	int changed = 0;
@@ -2222,7 +2264,7 @@ xlog_state_clean_log(xlog_t *log)
 
 STATIC xfs_lsn_t
 xlog_get_lowest_lsn(
-	xlog_t		*log)
+	struct xlog	*log)
 {
 	xlog_in_core_t  *lsn_log;
 	xfs_lsn_t	lowest_lsn, lsn;
@@ -2245,9 +2287,9 @@ xlog_get_lowest_lsn(
 
 STATIC void
 xlog_state_do_callback(
-	xlog_t		*log,
-	int		aborted,
-	xlog_in_core_t	*ciclog)
+	struct xlog		*log,
+	int			aborted,
+	struct xlog_in_core	*ciclog)
 {
 	xlog_in_core_t	   *iclog;
 	xlog_in_core_t	   *first_iclog;	/* used to know when we've
@@ -2467,7 +2509,7 @@ xlog_state_done_syncing(
 	xlog_in_core_t	*iclog,
 	int		aborted)
 {
-	xlog_t		   *log = iclog->ic_log;
+	struct xlog	   *log = iclog->ic_log;
 
 	spin_lock(&log->l_icloglock);
 
@@ -2521,12 +2563,13 @@ xlog_state_done_syncing(
  *		is copied.
  */
 STATIC int
-xlog_state_get_iclog_space(xlog_t	  *log,
-			   int		  len,
-			   xlog_in_core_t **iclogp,
-			   xlog_ticket_t  *ticket,
-			   int		  *continued_write,
-			   int		  *logoffsetp)
+xlog_state_get_iclog_space(
+	struct xlog		*log,
+	int			len,
+	struct xlog_in_core	**iclogp,
+	struct xlog_ticket	*ticket,
+	int			*continued_write,
+	int			*logoffsetp)
 {
 	int		  log_offset;
 	xlog_rec_header_t *head;
@@ -2631,8 +2674,9 @@ restart:
  * move grant reservation head forward.
  */
 STATIC void
-xlog_regrant_reserve_log_space(xlog_t	     *log,
-			       xlog_ticket_t *ticket)
+xlog_regrant_reserve_log_space(
+	struct xlog		*log,
+	struct xlog_ticket	*ticket)
 {
 	trace_xfs_log_regrant_reserve_enter(log, ticket);
 
@@ -2677,8 +2721,9 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
  * in the current reservation field.
  */
 STATIC void
-xlog_ungrant_log_space(xlog_t	     *log,
-		       xlog_ticket_t *ticket)
+xlog_ungrant_log_space(
+	struct xlog		*log,
+	struct xlog_ticket	*ticket)
 {
 	int	bytes;
 
@@ -2717,8 +2762,8 @@ xlog_ungrant_log_space(xlog_t	     *log,
  */
 STATIC int
 xlog_state_release_iclog(
-	xlog_t		*log,
-	xlog_in_core_t	*iclog)
+	struct xlog		*log,
+	struct xlog_in_core	*iclog)
 {
 	int		sync = 0;	/* do we sync? */
 
@@ -2768,9 +2813,10 @@ xlog_state_release_iclog(
  * that every data block.  We have run out of space in this log record.
  */
 STATIC void
-xlog_state_switch_iclogs(xlog_t		*log,
-			 xlog_in_core_t *iclog,
-			 int		eventual_size)
+xlog_state_switch_iclogs(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			eventual_size)
 {
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
 	if (!eventual_size)
@@ -3114,7 +3160,9 @@ xfs_log_force_lsn(
  * disk.
  */
 STATIC void
-xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
+xlog_state_want_sync(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog)
 {
 	assert_spin_locked(&log->l_icloglock);
 
@@ -3158,7 +3206,7 @@ xfs_log_ticket_get(
 /*
  * Allocate and initialise a new log ticket.
  */
-xlog_ticket_t *
+struct xlog_ticket *
 xlog_ticket_alloc(
 	struct xlog	*log,
 	int		unit_bytes,
@@ -3346,9 +3394,10 @@ xlog_verify_grant_tail(
 
 /* check if it will fit */
 STATIC void
-xlog_verify_tail_lsn(xlog_t	    *log,
-		     xlog_in_core_t *iclog,
-		     xfs_lsn_t	    tail_lsn)
+xlog_verify_tail_lsn(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	xfs_lsn_t		tail_lsn)
 {
     int blocks;
 
@@ -3385,10 +3434,11 @@ xlog_verify_tail_lsn(xlog_t	    *log,
  *	the cycle numbers agree with the current cycle number.
  */
 STATIC void
-xlog_verify_iclog(xlog_t	 *log,
-		  xlog_in_core_t *iclog,
-		  int		 count,
-		  boolean_t	 syncing)
+xlog_verify_iclog(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			count,
+	boolean_t		syncing)
 {
 	xlog_op_header_t	*ophead;
 	xlog_in_core_t		*icptr;
@@ -3482,7 +3532,7 @@ xlog_verify_iclog(xlog_t	 *log,
  */
 STATIC int
 xlog_state_ioerror(
-	xlog_t	*log)
+	struct xlog	*log)
 {
 	xlog_in_core_t	*iclog, *ic;
 
@@ -3527,7 +3577,7 @@ xfs_log_force_umount(
 	struct xfs_mount	*mp,
 	int			logerror)
 {
-	xlog_t		*log;
+	struct xlog	*log;
 	int		retval;
 
 	log = mp->m_log;
@@ -3634,7 +3684,8 @@ xfs_log_force_umount(
 }
 
 STATIC int
-xlog_iclogs_empty(xlog_t *log)
+xlog_iclogs_empty(
+	struct xlog	*log)
 {
 	xlog_in_core_t	*iclog;
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 72eba22..18a801d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -487,7 +487,7 @@ struct xlog_grant_head {
  * overflow 31 bits worth of byte offset, so using a byte number will mean
  * that round off problems won't occur when releasing partial reservations.
  */
-typedef struct xlog {
+struct xlog {
 	/* The following fields don't need locking */
 	struct xfs_mount	*l_mp;	        /* mount point */
 	struct xfs_ail		*l_ailp;	/* AIL log is working with */
@@ -540,7 +540,7 @@ typedef struct xlog {
 	char			*l_iclog_bak[XLOG_MAX_ICLOGS];
 #endif
 
-} xlog_t;
+};
 
 #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
 	((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
@@ -548,9 +548,17 @@ typedef struct xlog {
 #define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)
 
 /* common routines */
-extern int	 xlog_recover(xlog_t *log);
-extern int	 xlog_recover_finish(xlog_t *log);
-extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
+extern int
+xlog_recover(
+	struct xlog		*log);
+extern int
+xlog_recover_finish(
+	struct xlog		*log);
+extern void
+xlog_pack_data(
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int);
 
 extern kmem_zone_t *xfs_log_ticket_zone;
 struct xlog_ticket *
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a7be98a..a76ba88 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,10 +43,18 @@
 #include "xfs_utils.h"
 #include "xfs_trace.h"
 
-STATIC int	xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
-STATIC int	xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
+STATIC int
+xlog_find_zeroed(
+	struct xlog	*,
+	xfs_daddr_t	*);
+STATIC int
+xlog_clear_stale_blocks(
+	struct xlog	*,
+	xfs_lsn_t);
 #if defined(DEBUG)
-STATIC void	xlog_recover_check_summary(xlog_t *);
+STATIC void
+xlog_recover_check_summary(
+	struct xlog *);
 #else
 #define	xlog_recover_check_summary(log)
 #endif
@@ -74,7 +82,7 @@ struct xfs_buf_cancel {
 
 static inline int
 xlog_buf_bbcount_valid(
-	xlog_t		*log,
+	struct xlog	*log,
 	int		bbcount)
 {
 	return bbcount > 0 && bbcount <= log->l_logBBsize;
@@ -87,7 +95,7 @@ xlog_buf_bbcount_valid(
  */
 STATIC xfs_buf_t *
 xlog_get_bp(
-	xlog_t		*log,
+	struct xlog	*log,
 	int		nbblks)
 {
 	struct xfs_buf	*bp;
@@ -138,10 +146,10 @@ xlog_put_bp(
  */
 STATIC xfs_caddr_t
 xlog_align(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	blk_no,
 	int		nbblks,
-	xfs_buf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	xfs_daddr_t	offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
 
@@ -155,10 +163,10 @@ xlog_align(
  */
 STATIC int
 xlog_bread_noalign(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	blk_no,
 	int		nbblks,
-	xfs_buf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	int		error;
 
@@ -189,10 +197,10 @@ xlog_bread_noalign(
 
 STATIC int
 xlog_bread(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	blk_no,
 	int		nbblks,
-	xfs_buf_t	*bp,
+	struct xfs_buf	*bp,
 	xfs_caddr_t	*offset)
 {
 	int		error;
@@ -211,10 +219,10 @@ xlog_bread(
  */
 STATIC int
 xlog_bread_offset(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	blk_no,		/* block to read from */
 	int		nbblks,		/* blocks to read */
-	xfs_buf_t	*bp,
+	struct xfs_buf	*bp,
 	xfs_caddr_t	offset)
 {
 	xfs_caddr_t	orig_offset = bp->b_addr;
@@ -241,10 +249,10 @@ xlog_bread_offset(
  */
 STATIC int
 xlog_bwrite(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	blk_no,
 	int		nbblks,
-	xfs_buf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	int		error;
 
@@ -378,8 +386,8 @@ xlog_recover_iodone(
  */
 STATIC int
 xlog_find_cycle_start(
-	xlog_t		*log,
-	xfs_buf_t	*bp,
+	struct xlog	*log,
+	struct xfs_buf	*bp,
 	xfs_daddr_t	first_blk,
 	xfs_daddr_t	*last_blk,
 	uint		cycle)
@@ -421,7 +429,7 @@ xlog_find_cycle_start(
  */
 STATIC int
 xlog_find_verify_cycle(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	start_blk,
 	int		nbblks,
 	uint		stop_on_cycle_no,
@@ -490,7 +498,7 @@ out:
  */
 STATIC int
 xlog_find_verify_log_record(
-	xlog_t			*log,
+	struct xlog		*log,
 	xfs_daddr_t		start_blk,
 	xfs_daddr_t		*last_blk,
 	int			extra_bblks)
@@ -600,7 +608,7 @@ out:
  */
 STATIC int
 xlog_find_head(
-	xlog_t 		*log,
+	struct xlog	*log,
 	xfs_daddr_t	*return_head_blk)
 {
 	xfs_buf_t	*bp;
@@ -871,7 +879,7 @@ validate_head:
  */
 STATIC int
 xlog_find_tail(
-	xlog_t			*log,
+	struct xlog		*log,
 	xfs_daddr_t		*head_blk,
 	xfs_daddr_t		*tail_blk)
 {
@@ -1080,7 +1088,7 @@ done:
  */
 STATIC int
 xlog_find_zeroed(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	*blk_no)
 {
 	xfs_buf_t	*bp;
@@ -1183,7 +1191,7 @@ bp_err:
  */
 STATIC void
 xlog_add_record(
-	xlog_t			*log,
+	struct xlog		*log,
 	xfs_caddr_t		buf,
 	int			cycle,
 	int			block,
@@ -1205,7 +1213,7 @@ xlog_add_record(
 
 STATIC int
 xlog_write_log_records(
-	xlog_t		*log,
+	struct xlog	*log,
 	int		cycle,
 	int		start_block,
 	int		blocks,
@@ -1305,7 +1313,7 @@ xlog_write_log_records(
  */
 STATIC int
 xlog_clear_stale_blocks(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_lsn_t	tail_lsn)
 {
 	int		tail_cycle, head_cycle;
@@ -2050,11 +2058,11 @@ xfs_qm_dqcheck(
  */
 STATIC void
 xlog_recover_do_dquot_buffer(
-	xfs_mount_t		*mp,
-	xlog_t			*log,
-	xlog_recover_item_t	*item,
-	xfs_buf_t		*bp,
-	xfs_buf_log_format_t	*buf_f)
+	struct xfs_mount		*mp,
+	struct xlog			*log,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f)
 {
 	uint			type;
 
@@ -2108,9 +2116,9 @@ xlog_recover_do_dquot_buffer(
  */
 STATIC int
 xlog_recover_buffer_pass2(
-	xlog_t			*log,
-	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item)
 {
 	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
 	xfs_mount_t		*mp = log->l_mp;
@@ -2189,9 +2197,9 @@ xlog_recover_buffer_pass2(
 
 STATIC int
 xlog_recover_inode_pass2(
-	xlog_t			*log,
-	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item)
 {
 	xfs_inode_log_format_t	*in_f;
 	xfs_mount_t		*mp = log->l_mp;
@@ -2452,14 +2460,14 @@ error:
 }
 
 /*
- * Recover QUOTAOFF records. We simply make a note of it in the xlog_t
+ * Recover QUOTAOFF records. We simply make a note of it in the xlog
  * structure, so that we know not to do any dquot item or dquot buffer recovery,
  * of that type.
  */
 STATIC int
 xlog_recover_quotaoff_pass1(
-	xlog_t			*log,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
 {
 	xfs_qoff_logformat_t	*qoff_f = item->ri_buf[0].i_addr;
 	ASSERT(qoff_f);
@@ -2483,9 +2491,9 @@ xlog_recover_quotaoff_pass1(
  */
 STATIC int
 xlog_recover_dquot_pass2(
-	xlog_t			*log,
-	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item)
 {
 	xfs_mount_t		*mp = log->l_mp;
 	xfs_buf_t		*bp;
@@ -2578,9 +2586,9 @@ xlog_recover_dquot_pass2(
  */
 STATIC int
 xlog_recover_efi_pass2(
-	xlog_t			*log,
-	xlog_recover_item_t	*item,
-	xfs_lsn_t		lsn)
+	struct xlog			*log,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
 {
 	int			error;
 	xfs_mount_t		*mp = log->l_mp;
@@ -2616,8 +2624,8 @@ xlog_recover_efi_pass2(
  */
 STATIC int
 xlog_recover_efd_pass2(
-	xlog_t			*log,
-	xlog_recover_item_t	*item)
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
 {
 	xfs_efd_log_format_t	*efd_formatp;
 	xfs_efi_log_item_t	*efip = NULL;
@@ -2812,9 +2820,9 @@ xlog_recover_unmount_trans(
  */
 STATIC int
 xlog_recover_process_data(
-	xlog_t			*log,
+	struct xlog		*log,
 	struct hlist_head	rhash[],
-	xlog_rec_header_t	*rhead,
+	struct xlog_rec_header	*rhead,
 	xfs_caddr_t		dp,
 	int			pass)
 {
@@ -2986,7 +2994,7 @@ abort_error:
  */
 STATIC int
 xlog_recover_process_efis(
-	xlog_t			*log)
+	struct xlog	*log)
 {
 	xfs_log_item_t		*lip;
 	xfs_efi_log_item_t	*efip;
@@ -3147,7 +3155,7 @@ xlog_recover_process_one_iunlink(
  */
 STATIC void
 xlog_recover_process_iunlinks(
-	xlog_t		*log)
+	struct xlog	*log)
 {
 	xfs_mount_t	*mp;
 	xfs_agnumber_t	agno;
@@ -3209,9 +3217,9 @@ xlog_recover_process_iunlinks(
 #ifdef DEBUG
 STATIC void
 xlog_pack_data_checksum(
-	xlog_t		*log,
-	xlog_in_core_t	*iclog,
-	int		size)
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
+	int			size)
 {
 	int		i;
 	__be32		*up;
@@ -3234,8 +3242,8 @@ xlog_pack_data_checksum(
  */
 void
 xlog_pack_data(
-	xlog_t			*log,
-	xlog_in_core_t		*iclog,
+	struct xlog		*log,
+	struct xlog_in_core	*iclog,
 	int			roundoff)
 {
 	int			i, j, k;
@@ -3274,9 +3282,9 @@ xlog_pack_data(
 
 STATIC void
 xlog_unpack_data(
-	xlog_rec_header_t	*rhead,
+	struct xlog_rec_header	*rhead,
 	xfs_caddr_t		dp,
-	xlog_t			*log)
+	struct xlog		*log)
 {
 	int			i, j, k;
 
@@ -3299,8 +3307,8 @@ xlog_unpack_data(
 
 STATIC int
 xlog_valid_rec_header(
-	xlog_t			*log,
-	xlog_rec_header_t	*rhead,
+	struct xlog		*log,
+	struct xlog_rec_header	*rhead,
 	xfs_daddr_t		blkno)
 {
 	int			hlen;
@@ -3343,7 +3351,7 @@ xlog_valid_rec_header(
  */
 STATIC int
 xlog_do_recovery_pass(
-	xlog_t			*log,
+	struct xlog		*log,
 	xfs_daddr_t		head_blk,
 	xfs_daddr_t		tail_blk,
 	int			pass)
@@ -3595,7 +3603,7 @@ xlog_do_recovery_pass(
  */
 STATIC int
 xlog_do_log_recovery(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	head_blk,
 	xfs_daddr_t	tail_blk)
 {
@@ -3646,7 +3654,7 @@ xlog_do_log_recovery(
  */
 STATIC int
 xlog_do_recover(
-	xlog_t		*log,
+	struct xlog	*log,
 	xfs_daddr_t	head_blk,
 	xfs_daddr_t	tail_blk)
 {
@@ -3721,7 +3729,7 @@ xlog_do_recover(
  */
 int
 xlog_recover(
-	xlog_t		*log)
+	struct xlog	*log)
 {
 	xfs_daddr_t	head_blk, tail_blk;
 	int		error;
@@ -3767,7 +3775,7 @@ xlog_recover(
  */
 int
 xlog_recover_finish(
-	xlog_t		*log)
+	struct xlog	*log)
 {
 	/*
 	 * Now we're ready to do the transactions needed for the
@@ -3814,7 +3822,7 @@ xlog_recover_finish(
  */
 void
 xlog_recover_check_summary(
-	xlog_t		*log)
+	struct xlog	*log)
 {
 	xfs_mount_t	*mp;
 	xfs_agf_t	*agfp;
-- 
cgit v0.10.2


From 77c1a08fc9ece4cb130b9fd279738e799f0c2864 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:07 +1000
Subject: xfs: struct xfs_buf_log_format isn't variable sized.

The struct xfs_buf_log_format wants to think the dirty bitmap is
variable sized.  In fact, it is variable size on disk simply due to
the way we map it from the in-memory structure, but we still just
use a fixed size memory allocation for the in-memory structure.

Hence it makes no sense to set the function up as a variable sized
structure when we already know it's maximum size, and we always
allocate it as such. Simplify the structure by making the dirty
bitmap a fixed sized array and just using the size of the structure
for the allocation size.

This will make it much simpler to allocate and manipulate an array
of format structures for discontiguous buffer support.

The previous struct xfs_buf_log_item size according to
/proc/slabinfo was 224 bytes. pahole doesn't give the same size
because of the variable size definition. With this modification,
pahole reports the same as /proc/slabinfo:

	/* size: 224, cachelines: 4, members: 6 */

Because the xfs_buf_log_item size is now determined by the maximum
supported block size we introduce a dependency on xfs_alloc_btree.h.
Avoid this dependency by moving the idefines for the maximum block
sizes supported to xfs_types.h with all the other max/min type
defines to avoid any new dependencies.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index a6caa00..359fb86 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -51,20 +51,6 @@ typedef struct xfs_alloc_rec_incore {
 typedef __be32 xfs_alloc_ptr_t;
 
 /*
- * Minimum and maximum blocksize and sectorsize.
- * The blocksize upper limit is pretty much arbitrary.
- * The sectorsize upper limit is due to sizeof(sb_sectsize).
- */
-#define XFS_MIN_BLOCKSIZE_LOG	9	/* i.e. 512 bytes */
-#define XFS_MAX_BLOCKSIZE_LOG	16	/* i.e. 65536 bytes */
-#define XFS_MIN_BLOCKSIZE	(1 << XFS_MIN_BLOCKSIZE_LOG)
-#define XFS_MAX_BLOCKSIZE	(1 << XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG	9	/* i.e. 512 bytes */
-#define XFS_MAX_SECTORSIZE_LOG	15	/* i.e. 32768 bytes */
-#define XFS_MIN_SECTORSIZE	(1 << XFS_MIN_SECTORSIZE_LOG)
-#define XFS_MAX_SECTORSIZE	(1 << XFS_MAX_SECTORSIZE_LOG)
-
-/*
  * Block numbers in the AG:
  * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
  */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b8..52cd8f8 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -240,15 +240,13 @@ xfs_buf_item_format(
 	       (bip->bli_flags & XFS_BLI_STALE));
 
 	/*
-	 * The size of the base structure is the size of the
-	 * declared structure plus the space for the extra words
-	 * of the bitmap.  We subtract one from the map size, because
-	 * the first element of the bitmap is accounted for in the
-	 * size of the base structure.
+	 * Base size is the actual size of the ondisk structure - it reflects
+	 * the actual size of the dirty bitmap rather than the size of the in
+	 * memory structure.
 	 */
-	base_size =
-		(uint)(sizeof(xfs_buf_log_format_t) +
-		       ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
+	base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
+			(bip->bli_format.blf_map_size *
+				sizeof(bip->bli_format.blf_data_map[0]));
 	vecp->i_addr = &bip->bli_format;
 	vecp->i_len = base_size;
 	vecp->i_type = XLOG_REG_TYPE_BFORMAT;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index b6ecd20..ff26867 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -21,23 +21,6 @@
 extern kmem_zone_t	*xfs_buf_item_zone;
 
 /*
- * This is the structure used to lay out a buf log item in the
- * log.  The data map describes which 128 byte chunks of the buffer
- * have been logged.
- * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
- */
-typedef struct xfs_buf_log_format {
-	unsigned short	blf_type;	/* buf log item type indicator */
-	unsigned short	blf_size;	/* size of this item */
-	ushort		blf_flags;	/* misc state */
-	ushort		blf_len;	/* number of blocks in this buf */
-	__int64_t	blf_blkno;	/* starting blkno of this buf */
-	unsigned int	blf_map_size;	/* size of data bitmap in words */
-	unsigned int	blf_data_map[1];/* variable size bitmap of */
-					/*   regions of buffer in this item */
-} xfs_buf_log_format_t;
-
-/*
  * This flag indicates that the buffer contains on disk inodes
  * and requires special recovery handling.
  */
@@ -61,6 +44,23 @@ typedef struct xfs_buf_log_format {
 #define	NBWORD			(NBBY * sizeof(unsigned int))
 
 /*
+ * This is the structure used to lay out a buf log item in the
+ * log.  The data map describes which 128 byte chunks of the buffer
+ * have been logged.
+ */
+#define XFS_BLF_DATAMAP_SIZE	((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+
+typedef struct xfs_buf_log_format {
+	unsigned short	blf_type;	/* buf log item type indicator */
+	unsigned short	blf_size;	/* size of this item */
+	ushort		blf_flags;	/* misc state */
+	ushort		blf_len;	/* number of blocks in this buf */
+	__int64_t	blf_blkno;	/* starting blkno of this buf */
+	unsigned int	blf_map_size;	/* used size of data bitmap in words */
+	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
+} xfs_buf_log_format_t;
+
+/*
  * buf log item flags
  */
 #define	XFS_BLI_HOLD		0x01
@@ -102,7 +102,7 @@ typedef struct xfs_buf_log_item {
 	char			*bli_orig;	/* original buffer copy */
 	char			*bli_logged;	/* bytes logged (bitmap) */
 #endif
-	xfs_buf_log_format_t	bli_format;	/* in-log header */
+	struct xfs_buf_log_format bli_format;	/* embedded in-log header */
 } xfs_buf_log_item_t;
 
 void	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0d9de41..425f6e9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1514,9 +1514,8 @@ xfs_init_zones(void)
 	 * size possible under XFS.  This wastes a little bit of memory,
 	 * but it is much faster.
 	 */
-	xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-				(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
-				  NBWORD) * sizeof(int))), "xfs_buf_item");
+	xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
+					   "xfs_buf_item");
 	if (!xfs_buf_item_zone)
 		goto out_destroy_log_item_desc_zone;
 
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 398cf68..7a41874 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -133,6 +133,20 @@ typedef __uint64_t	xfs_filblks_t;	/* number of blocks in a file */
 #define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
 
 /*
+ * Minimum and maximum blocksize and sectorsize.
+ * The blocksize upper limit is pretty much arbitrary.
+ * The sectorsize upper limit is due to sizeof(sb_sectsize).
+ */
+#define XFS_MIN_BLOCKSIZE_LOG	9	/* i.e. 512 bytes */
+#define XFS_MAX_BLOCKSIZE_LOG	16	/* i.e. 65536 bytes */
+#define XFS_MIN_BLOCKSIZE	(1 << XFS_MIN_BLOCKSIZE_LOG)
+#define XFS_MAX_BLOCKSIZE	(1 << XFS_MAX_BLOCKSIZE_LOG)
+#define XFS_MIN_SECTORSIZE_LOG	9	/* i.e. 512 bytes */
+#define XFS_MAX_SECTORSIZE_LOG	15	/* i.e. 32768 bytes */
+#define XFS_MIN_SECTORSIZE	(1 << XFS_MIN_SECTORSIZE_LOG)
+#define XFS_MAX_SECTORSIZE	(1 << XFS_MAX_SECTORSIZE_LOG)
+
+/*
  * Min numbers of data/attr fork btree root pointers.
  */
 #define MINDBTPTRS	3
-- 
cgit v0.10.2


From cbb7baab285a540f173ef1ec3d5bcf9d0ad29d16 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:08 +1000
Subject: xfs: separate buffer indexing from block map

To support discontiguous buffers in the buffer cache, we need to
separate the cache index variables from the I/O map. While this is
currently a 1:1 mapping, discontiguous buffer support will break
this relationship.

However, for caching purposes, we can still treat them the same as a
contiguous buffer - the block number of the first block and the
length of the buffer - as that is still a unique representation.
Also, the only way we will ever access the discontiguous regions of
buffers is via bulding the complete buffer in the first place, so
using the initial block number and entire buffer length is a sane
way to index the buffers.

Add a block mapping vector construct to the xfs_buf and use it in
the places where we are doing IO instead of the current
b_bn/b_length variables.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb42..a843873b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -202,6 +202,8 @@ xfs_buf_alloc(
 	bp->b_io_length = numblks;
 	bp->b_flags = flags;
 	bp->b_bn = blkno;
+	bp->b_map.bm_bn = blkno;
+	bp->b_map.bm_len = numblks;
 	atomic_set(&bp->b_pin_count, 0);
 	init_waitqueue_head(&bp->b_waiters);
 
@@ -327,8 +329,9 @@ xfs_buf_allocate_memory(
 	}
 
 use_alloc_page:
-	start = BBTOB(bp->b_bn) >> PAGE_SHIFT;
-	end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
+	end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
+								>> PAGE_SHIFT;
 	page_count = end - start;
 	error = _xfs_buf_get_pages(bp, page_count, flags);
 	if (unlikely(error))
@@ -560,8 +563,6 @@ xfs_buf_get(
 	if (bp != new_bp)
 		xfs_buf_free(new_bp);
 
-	bp->b_io_length = bp->b_length;
-
 found:
 	if (!bp->b_addr) {
 		error = _xfs_buf_map_pages(bp, flags);
@@ -584,7 +585,7 @@ _xfs_buf_read(
 	xfs_buf_flags_t		flags)
 {
 	ASSERT(!(flags & XBF_WRITE));
-	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+	ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
 
 	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -665,8 +666,8 @@ xfs_buf_read_uncached(
 		return NULL;
 
 	/* set up the buffer for a read IO */
-	XFS_BUF_SET_ADDR(bp, daddr);
-	XFS_BUF_READ(bp);
+	bp->b_map.bm_bn = daddr;
+	bp->b_flags |= XBF_READ;
 
 	xfsbdstrat(target->bt_mount, bp);
 	error = xfs_buf_iowait(bp);
@@ -695,6 +696,8 @@ xfs_buf_set_empty(
 	bp->b_length = numblks;
 	bp->b_io_length = numblks;
 	bp->b_bn = XFS_BUF_DADDR_NULL;
+	bp->b_map.bm_bn = XFS_BUF_DADDR_NULL;
+	bp->b_map.bm_len = bp->b_length;
 }
 
 static inline struct page *
@@ -1159,7 +1162,7 @@ _xfs_buf_ioapply(
 	struct bio		*bio;
 	int			offset = bp->b_offset;
 	int			size = BBTOB(bp->b_io_length);
-	sector_t		sector = bp->b_bn;
+	sector_t		sector = bp->b_map.bm_bn;
 
 	total_nr_pages = bp->b_page_count;
 	map_i = 0;
@@ -1564,7 +1567,7 @@ xfs_buf_cmp(
 	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
 	xfs_daddr_t		diff;
 
-	diff = ap->b_bn - bp->b_bn;
+	diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
 	if (diff < 0)
 		return -1;
 	if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d139..c9c2ba9 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -58,6 +58,7 @@ typedef enum {
 #define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
 #define _XBF_KMEM	(1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	(1 << 22)/* buffer on a delwri queue */
+#define _XBF_COMPOUND	(1 << 23)/* compound buffer */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -75,7 +76,8 @@ typedef unsigned int xfs_buf_flags_t;
 	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
-	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
+	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
+	{ _XBF_COMPOUND,	"COMPOUND" }
 
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
@@ -98,6 +100,11 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
 
 #define XB_PAGES	2
 
+struct xfs_buf_map {
+	xfs_daddr_t		bm_bn;	/* block number for I/O */
+	int			bm_len;	/* size of I/O */
+};
+
 typedef struct xfs_buf {
 	/*
 	 * first cacheline holds all the fields needed for an uncontended cache
@@ -107,7 +114,7 @@ typedef struct xfs_buf {
 	 * fast-path on locking.
 	 */
 	struct rb_node		b_rbnode;	/* rbtree node */
-	xfs_daddr_t		b_bn;		/* block number for I/O */
+	xfs_daddr_t		b_bn;		/* block number of buffer */
 	int			b_length;	/* size of buffer in BBs */
 	atomic_t		b_hold;		/* reference count */
 	atomic_t		b_lru_ref;	/* lru reclaim ref count */
@@ -127,12 +134,14 @@ typedef struct xfs_buf {
 	struct xfs_trans	*b_transp;
 	struct page		**b_pages;	/* array of page pointers */
 	struct page		*b_page_array[XB_PAGES]; /* inline pages */
+	struct xfs_buf_map	b_map;		/* compound buffer map */
 	int			b_io_length;	/* IO size in BBs */
 	atomic_t		b_pin_count;	/* pin count */
 	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
 	unsigned int		b_page_count;	/* size of page array */
 	unsigned int		b_offset;	/* page offset in first page */
 	unsigned short		b_error;	/* error code on I/O */
+
 #ifdef XFS_BUF_LOCK_TRACKING
 	int			b_last_holder;
 #endif
@@ -233,8 +242,18 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
 #define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
 
-#define XFS_BUF_ADDR(bp)		((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
+/*
+ * These macros use the IO block map rather than b_bn. b_bn is now really
+ * just for the buffer cache index for cached buffers. As IO does not use b_bn
+ * anymore, uncached buffers do not use b_bn at all and hence must modify the IO
+ * map directly. Uncached buffers are not allowed to be discontiguous, so this
+ * is safe to do.
+ *
+ * In future, uncached buffers will pass the block number directly to the io
+ * request function and hence these macros will go away at that point.
+ */
+#define XFS_BUF_ADDR(bp)		((bp)->b_map.bm_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_map.bm_bn = (xfs_daddr_t)(bno))
 
 static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
 {
-- 
cgit v0.10.2


From 3e85c868a697805a3d4c7800a6bacdfc81d15cdf Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:09 +1000
Subject: xfs: convert internal buffer functions to pass maps

While the external interface currently uses separate blockno/length
variables, we need to move internal interfaces to passing and
parsing vector maps. This will then allow us to add external
interfaces to support discontiguous buffer maps as the internal code
will already support them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a843873b..82bb812 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -164,14 +164,49 @@ xfs_buf_stale(
 	ASSERT(atomic_read(&bp->b_hold) >= 1);
 }
 
+static int
+xfs_buf_get_maps(
+	struct xfs_buf		*bp,
+	int			map_count)
+{
+	ASSERT(bp->b_maps == NULL);
+	bp->b_map_count = map_count;
+
+	if (map_count == 1) {
+		bp->b_maps = &bp->b_map;
+		return 0;
+	}
+
+	bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
+				KM_NOFS);
+	if (!bp->b_maps)
+		return ENOMEM;
+	return 0;
+}
+
+/*
+ *	Frees b_pages if it was allocated.
+ */
+static void
+xfs_buf_free_maps(
+	struct xfs_buf	*bp)
+{
+	if (bp->b_maps != &bp->b_map) {
+		kmem_free(bp->b_maps);
+		bp->b_maps = NULL;
+	}
+}
+
 struct xfs_buf *
-xfs_buf_alloc(
+_xfs_buf_alloc(
 	struct xfs_buftarg	*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
 	struct xfs_buf		*bp;
+	int			error;
+	int			i;
 
 	bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
 	if (unlikely(!bp))
@@ -192,18 +227,28 @@ xfs_buf_alloc(
 	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	XB_SET_OWNER(bp);
 	bp->b_target = target;
+	bp->b_flags = flags;
 
 	/*
 	 * Set length and io_length to the same value initially.
 	 * I/O routines should use io_length, which will be the same in
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
-	bp->b_length = numblks;
-	bp->b_io_length = numblks;
-	bp->b_flags = flags;
-	bp->b_bn = blkno;
-	bp->b_map.bm_bn = blkno;
-	bp->b_map.bm_len = numblks;
+	error = xfs_buf_get_maps(bp, nmaps);
+	if (error)  {
+		kmem_zone_free(xfs_buf_zone, bp);
+		return NULL;
+	}
+
+	bp->b_bn = map[0].bm_bn;
+	bp->b_length = 0;
+	for (i = 0; i < nmaps; i++) {
+		bp->b_maps[i].bm_bn = map[i].bm_bn;
+		bp->b_maps[i].bm_len = map[i].bm_len;
+		bp->b_length += map[i].bm_len;
+	}
+	bp->b_io_length = bp->b_length;
+
 	atomic_set(&bp->b_pin_count, 0);
 	init_waitqueue_head(&bp->b_waiters);
 
@@ -282,6 +327,7 @@ xfs_buf_free(
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
 	_xfs_buf_free_pages(bp);
+	xfs_buf_free_maps(bp);
 	kmem_zone_free(xfs_buf_zone, bp);
 }
 
@@ -428,8 +474,8 @@ _xfs_buf_map_pages(
 xfs_buf_t *
 _xfs_buf_find(
 	struct xfs_buftarg	*btp,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags,
 	xfs_buf_t		*new_bp)
 {
@@ -438,7 +484,12 @@ _xfs_buf_find(
 	struct rb_node		**rbp;
 	struct rb_node		*parent;
 	xfs_buf_t		*bp;
+	xfs_daddr_t		blkno = map[0].bm_bn;
+	int			numblks = 0;
+	int			i;
 
+	for (i = 0; i < nmaps; i++)
+		numblks += map[i].bm_len;
 	numbytes = BBTOB(numblks);
 
 	/* Check for IOs smaller than the sector size / not sector aligned */
@@ -539,22 +590,23 @@ xfs_buf_get(
 	struct xfs_buf		*bp;
 	struct xfs_buf		*new_bp;
 	int			error = 0;
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
 
-	bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+	bp = _xfs_buf_find(target, &map, 1, flags, NULL);
 	if (likely(bp))
 		goto found;
 
-	new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+	new_bp = _xfs_buf_alloc(target, &map, 1, flags);
 	if (unlikely(!new_bp))
 		return NULL;
 
 	error = xfs_buf_allocate_memory(new_bp, flags);
 	if (error) {
-		kmem_zone_free(xfs_buf_zone, new_bp);
+		xfs_buf_free(new_bp);
 		return NULL;
 	}
 
-	bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+	bp = _xfs_buf_find(target, &map, 1, flags, new_bp);
 	if (!bp) {
 		xfs_buf_free(new_bp);
 		return NULL;
@@ -666,7 +718,9 @@ xfs_buf_read_uncached(
 		return NULL;
 
 	/* set up the buffer for a read IO */
-	bp->b_map.bm_bn = daddr;
+	ASSERT(bp->b_map_count == 1);
+	bp->b_bn = daddr;
+	bp->b_maps[0].bm_bn = daddr;
 	bp->b_flags |= XBF_READ;
 
 	xfsbdstrat(target->bt_mount, bp);
@@ -695,9 +749,11 @@ xfs_buf_set_empty(
 	bp->b_addr = NULL;
 	bp->b_length = numblks;
 	bp->b_io_length = numblks;
+
+	ASSERT(bp->b_map_count == 1);
 	bp->b_bn = XFS_BUF_DADDR_NULL;
-	bp->b_map.bm_bn = XFS_BUF_DADDR_NULL;
-	bp->b_map.bm_len = bp->b_length;
+	bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
+	bp->b_maps[0].bm_len = bp->b_length;
 }
 
 static inline struct page *
@@ -761,9 +817,10 @@ xfs_buf_get_uncached(
 {
 	unsigned long		page_count;
 	int			error, i;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
+	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
 
-	bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
+	bp = _xfs_buf_alloc(target, &map, 1, 0);
 	if (unlikely(bp == NULL))
 		goto fail;
 
@@ -794,6 +851,7 @@ xfs_buf_get_uncached(
 		__free_page(bp->b_pages[i]);
 	_xfs_buf_free_pages(bp);
  fail_free_buf:
+	xfs_buf_free_maps(bp);
 	kmem_zone_free(xfs_buf_zone, bp);
  fail:
 	return NULL;
@@ -1154,36 +1212,39 @@ xfs_buf_bio_end_io(
 	bio_put(bio);
 }
 
-STATIC void
-_xfs_buf_ioapply(
-	xfs_buf_t		*bp)
+static void
+xfs_buf_ioapply_map(
+	struct xfs_buf	*bp,
+	int		map,
+	int		*buf_offset,
+	int		*count,
+	int		rw)
 {
-	int			rw, map_i, total_nr_pages, nr_pages;
-	struct bio		*bio;
-	int			offset = bp->b_offset;
-	int			size = BBTOB(bp->b_io_length);
-	sector_t		sector = bp->b_map.bm_bn;
+	int		page_index;
+	int		total_nr_pages = bp->b_page_count;
+	int		nr_pages;
+	struct bio	*bio;
+	sector_t	sector =  bp->b_maps[map].bm_bn;
+	int		size;
+	int		offset;
 
 	total_nr_pages = bp->b_page_count;
-	map_i = 0;
 
-	if (bp->b_flags & XBF_WRITE) {
-		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
-		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
-		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
-	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
-	} else {
-		rw = READ;
+	/* skip the pages in the buffer before the start offset */
+	page_index = 0;
+	offset = *buf_offset;
+	while (offset >= PAGE_SIZE) {
+		page_index++;
+		offset -= PAGE_SIZE;
 	}
 
-	/* we only use the buffer cache for meta-data */
-	rw |= REQ_META;
+	/*
+	 * Limit the IO size to the length of the current vector, and update the
+	 * remaining IO count for the next time around.
+	 */
+	size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
+	*count -= size;
+	*buf_offset += size;
 
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
@@ -1198,13 +1259,14 @@ next_chunk:
 	bio->bi_private = bp;
 
 
-	for (; size && nr_pages; nr_pages--, map_i++) {
+	for (; size && nr_pages; nr_pages--, page_index++) {
 		int	rbytes, nbytes = PAGE_SIZE - offset;
 
 		if (nbytes > size)
 			nbytes = size;
 
-		rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+		rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
+				      offset);
 		if (rbytes < nbytes)
 			break;
 
@@ -1226,6 +1288,54 @@ next_chunk:
 		xfs_buf_ioerror(bp, EIO);
 		bio_put(bio);
 	}
+
+}
+
+STATIC void
+_xfs_buf_ioapply(
+	struct xfs_buf	*bp)
+{
+	struct blk_plug	plug;
+	int		rw;
+	int		offset;
+	int		size;
+	int		i;
+
+	if (bp->b_flags & XBF_WRITE) {
+		if (bp->b_flags & XBF_SYNCIO)
+			rw = WRITE_SYNC;
+		else
+			rw = WRITE;
+		if (bp->b_flags & XBF_FUA)
+			rw |= REQ_FUA;
+		if (bp->b_flags & XBF_FLUSH)
+			rw |= REQ_FLUSH;
+	} else if (bp->b_flags & XBF_READ_AHEAD) {
+		rw = READA;
+	} else {
+		rw = READ;
+	}
+
+	/* we only use the buffer cache for meta-data */
+	rw |= REQ_META;
+
+	/*
+	 * Walk all the vectors issuing IO on them. Set up the initial offset
+	 * into the buffer and the desired IO size before we start -
+	 * _xfs_buf_ioapply_vec() will modify them appropriately for each
+	 * subsequent call.
+	 */
+	offset = bp->b_offset;
+	size = BBTOB(bp->b_io_length);
+	blk_start_plug(&plug);
+	for (i = 0; i < bp->b_map_count; i++) {
+		xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+		if (bp->b_error)
+			break;
+		if (size <= 0)
+			break;	/* all done */
+	}
+	blk_finish_plug(&plug);
 }
 
 void
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c9c2ba9..67d1349 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -105,6 +105,9 @@ struct xfs_buf_map {
 	int			bm_len;	/* size of I/O */
 };
 
+#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
+	struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
+
 typedef struct xfs_buf {
 	/*
 	 * first cacheline holds all the fields needed for an uncontended cache
@@ -134,7 +137,9 @@ typedef struct xfs_buf {
 	struct xfs_trans	*b_transp;
 	struct page		**b_pages;	/* array of page pointers */
 	struct page		*b_page_array[XB_PAGES]; /* inline pages */
-	struct xfs_buf_map	b_map;		/* compound buffer map */
+	struct xfs_buf_map	*b_maps;	/* compound buffer map */
+	struct xfs_buf_map	b_map;		/* inline compound buffer map */
+	int			b_map_count;
 	int			b_io_length;	/* IO size in BBs */
 	atomic_t		b_pin_count;	/* pin count */
 	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
@@ -149,11 +154,35 @@ typedef struct xfs_buf {
 
 
 /* Finding and Reading Buffers */
-struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks, xfs_buf_flags_t flags,
-				struct xfs_buf *new_bp);
-#define xfs_incore(buftarg,blkno,len,lockit) \
-	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target,
+			      struct xfs_buf_map *map, int nmaps,
+			      xfs_buf_flags_t flags, struct xfs_buf *new_bp);
+
+static inline struct xfs_buf *
+xfs_incore(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+	return _xfs_buf_find(target, &map, 1, flags, NULL);
+}
+
+struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
+			       struct xfs_buf_map *map, int nmaps,
+			       xfs_buf_flags_t flags);
+
+static inline struct xfs_buf *
+xfs_buf_alloc(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+	return _xfs_buf_alloc(target, &map, 1, flags);
+}
 
 struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
 				size_t numblks, xfs_buf_flags_t flags);
@@ -163,8 +192,6 @@ void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno,
 				size_t numblks);
 
 struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
-struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks, xfs_buf_flags_t flags);
 void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
 int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
 
-- 
cgit v0.10.2


From 6dde27077eaf590eac279627f74b7e4e40b864b2 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:10 +1000
Subject: xfs: add discontiguous buffer map interface

With the internal interfaces supporting discontiguous buffer maps,
add external lookup, read and get interfaces so they can start to be
used.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 82bb812..39c5d76 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -581,22 +581,21 @@ found:
  * more hits than misses.
  */
 struct xfs_buf *
-xfs_buf_get(
-	xfs_buftarg_t		*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+xfs_buf_get_map(
+	struct xfs_buftarg	*target,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
 	struct xfs_buf		*bp;
 	struct xfs_buf		*new_bp;
 	int			error = 0;
-	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
 
-	bp = _xfs_buf_find(target, &map, 1, flags, NULL);
+	bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
 	if (likely(bp))
 		goto found;
 
-	new_bp = _xfs_buf_alloc(target, &map, 1, flags);
+	new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
 	if (unlikely(!new_bp))
 		return NULL;
 
@@ -606,7 +605,7 @@ xfs_buf_get(
 		return NULL;
 	}
 
-	bp = _xfs_buf_find(target, &map, 1, flags, new_bp);
+	bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
 	if (!bp) {
 		xfs_buf_free(new_bp);
 		return NULL;
@@ -649,17 +648,17 @@ _xfs_buf_read(
 }
 
 xfs_buf_t *
-xfs_buf_read(
-	xfs_buftarg_t		*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+xfs_buf_read_map(
+	struct xfs_buftarg	*target,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 
 	flags |= XBF_READ;
 
-	bp = xfs_buf_get(target, blkno, numblks, flags);
+	bp = xfs_buf_get_map(target, map, nmaps, flags);
 	if (bp) {
 		trace_xfs_buf_read(bp, flags, _RET_IP_);
 
@@ -687,15 +686,15 @@ xfs_buf_read(
  *	safe manner.
  */
 void
-xfs_buf_readahead(
-	xfs_buftarg_t		*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks)
+xfs_buf_readahead_map(
+	struct xfs_buftarg	*target,
+	struct xfs_buf_map	*map,
+	int			nmaps)
 {
 	if (bdi_read_congested(target->bt_bdi))
 		return;
 
-	xfs_buf_read(target, blkno, numblks,
+	xfs_buf_read_map(target, map, nmaps,
 		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
 }
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 67d1349..aa96bd4 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -184,12 +184,46 @@ xfs_buf_alloc(
 	return _xfs_buf_alloc(target, &map, 1, flags);
 }
 
-struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks, xfs_buf_flags_t flags);
-struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks, xfs_buf_flags_t flags);
-void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks);
+struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
+			       struct xfs_buf_map *map, int nmaps,
+			       xfs_buf_flags_t flags);
+struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target,
+			       struct xfs_buf_map *map, int nmaps,
+			       xfs_buf_flags_t flags);
+void xfs_buf_readahead_map(struct xfs_buftarg *target,
+			       struct xfs_buf_map *map, int nmaps);
+
+static inline struct xfs_buf *
+xfs_buf_get(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+	return xfs_buf_get_map(target, &map, 1, flags);
+}
+
+static inline struct xfs_buf *
+xfs_buf_read(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+	return xfs_buf_read_map(target, &map, 1, flags);
+}
+
+static inline void
+xfs_buf_readahead(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks)
+{
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+	return xfs_buf_readahead_map(target, &map, 1);
+}
 
 struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
 void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
-- 
cgit v0.10.2


From de2a4f59190303ff5b82ead2969968a325e61230 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:11 +1000
Subject: xfs: add discontiguous buffer support to transactions

Now that the buffer cache supports discontiguous buffers, add
support to the transaction buffer interface for getting and reading
buffers.

Note that this patch does not convert the buffer item logging to
support discontiguous buffers. That will be done as a separate
commit.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7c37b53..bc2afd5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -448,11 +448,51 @@ xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
 int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
 				  uint, uint);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
-struct xfs_buf	*xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
-				   int, uint);
-int		xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
-				   struct xfs_buftarg *, xfs_daddr_t, int, uint,
-				   struct xfs_buf **);
+
+struct xfs_buf	*xfs_trans_get_buf_map(struct xfs_trans *tp,
+				       struct xfs_buftarg *target,
+				       struct xfs_buf_map *map, int nmaps,
+				       uint flags);
+
+static inline struct xfs_buf *
+xfs_trans_get_buf(
+	struct xfs_trans	*tp,
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	int			numblks,
+	uint			flags)
+{
+	struct xfs_buf_map	map = {
+		.bm_bn = blkno,
+		.bm_len = numblks,
+	};
+	return xfs_trans_get_buf_map(tp, target, &map, 1, flags);
+}
+
+int		xfs_trans_read_buf_map(struct xfs_mount *mp,
+				       struct xfs_trans *tp,
+				       struct xfs_buftarg *target,
+				       struct xfs_buf_map *map, int nmaps,
+				       xfs_buf_flags_t flags,
+				       struct xfs_buf **bpp);
+
+static inline int
+xfs_trans_read_buf(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	int			numblks,
+	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_buf_map	map = {
+		.bm_bn = blkno,
+		.bm_len = numblks,
+	};
+	return xfs_trans_read_buf_map(mp, tp, target, &map, 1, flags, bpp);
+}
+
 struct xfs_buf	*xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
 
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 21c5a5e..6311b99 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -41,20 +41,26 @@ STATIC struct xfs_buf *
 xfs_trans_buf_item_match(
 	struct xfs_trans	*tp,
 	struct xfs_buftarg	*target,
-	xfs_daddr_t		blkno,
-	int			len)
+	struct xfs_buf_map	*map,
+	int			nmaps)
 {
 	struct xfs_log_item_desc *lidp;
 	struct xfs_buf_log_item	*blip;
+	int			len = 0;
+	int			i;
+
+	for (i = 0; i < nmaps; i++)
+		len += map[i].bm_len;
 
-	len = BBTOB(len);
 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
 		blip = (struct xfs_buf_log_item *)lidp->lid_item;
 		if (blip->bli_item.li_type == XFS_LI_BUF &&
 		    blip->bli_buf->b_target == target &&
-		    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
-		    BBTOB(blip->bli_buf->b_length) == len)
+		    XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
+		    blip->bli_buf->b_length == len) {
+			ASSERT(blip->bli_buf->b_map_count == nmaps);
 			return blip->bli_buf;
+		}
 	}
 
 	return NULL;
@@ -128,21 +134,19 @@ xfs_trans_bjoin(
  * If the transaction pointer is NULL, make this just a normal
  * get_buf() call.
  */
-xfs_buf_t *
-xfs_trans_get_buf(xfs_trans_t	*tp,
-		  xfs_buftarg_t	*target_dev,
-		  xfs_daddr_t	blkno,
-		  int		len,
-		  uint		flags)
+struct xfs_buf *
+xfs_trans_get_buf_map(
+	struct xfs_trans	*tp,
+	struct xfs_buftarg	*target,
+	struct xfs_buf_map	*map,
+	int			nmaps,
+	xfs_buf_flags_t		flags)
 {
 	xfs_buf_t		*bp;
 	xfs_buf_log_item_t	*bip;
 
-	/*
-	 * Default to a normal get_buf() call if the tp is NULL.
-	 */
-	if (tp == NULL)
-		return xfs_buf_get(target_dev, blkno, len, flags);
+	if (!tp)
+		return xfs_buf_get_map(target, map, nmaps, flags);
 
 	/*
 	 * If we find the buffer in the cache with this transaction
@@ -150,7 +154,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 	 * have it locked.  In this case we just increment the lock
 	 * recursion count and return the buffer to the caller.
 	 */
-	bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
+	bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 	if (bp != NULL) {
 		ASSERT(xfs_buf_islocked(bp));
 		if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
@@ -167,7 +171,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 		return (bp);
 	}
 
-	bp = xfs_buf_get(target_dev, blkno, len, flags);
+	bp = xfs_buf_get_map(target, map, nmaps, flags);
 	if (bp == NULL) {
 		return NULL;
 	}
@@ -246,26 +250,22 @@ int	xfs_error_mod = 33;
  * read_buf() call.
  */
 int
-xfs_trans_read_buf(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	xfs_buftarg_t	*target,
-	xfs_daddr_t	blkno,
-	int		len,
-	uint		flags,
-	xfs_buf_t	**bpp)
+xfs_trans_read_buf_map(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_buftarg	*target,
+	struct xfs_buf_map	*map,
+	int			nmaps,
+	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp)
 {
 	xfs_buf_t		*bp;
 	xfs_buf_log_item_t	*bip;
 	int			error;
 
 	*bpp = NULL;
-
-	/*
-	 * Default to a normal get_buf() call if the tp is NULL.
-	 */
-	if (tp == NULL) {
-		bp = xfs_buf_read(target, blkno, len, flags);
+	if (!tp) {
+		bp = xfs_buf_read_map(target, map, nmaps, flags);
 		if (!bp)
 			return (flags & XBF_TRYLOCK) ?
 					EAGAIN : XFS_ERROR(ENOMEM);
@@ -303,7 +303,7 @@ xfs_trans_read_buf(
 	 * If the buffer is not yet read in, then we read it in, increment
 	 * the lock recursion count, and return it to the caller.
 	 */
-	bp = xfs_trans_buf_item_match(tp, target, blkno, len);
+	bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 	if (bp != NULL) {
 		ASSERT(xfs_buf_islocked(bp));
 		ASSERT(bp->b_transp == tp);
@@ -349,7 +349,7 @@ xfs_trans_read_buf(
 		return 0;
 	}
 
-	bp = xfs_buf_read(target, blkno, len, flags);
+	bp = xfs_buf_read_map(target, map, nmaps, flags);
 	if (bp == NULL) {
 		*bpp = NULL;
 		return (flags & XBF_TRYLOCK) ?
-- 
cgit v0.10.2


From 372cc85ec6820c91b4eeff303880f25cb5a00ab5 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:12 +1000
Subject: xfs: support discontiguous buffers in the xfs_buf_log_item

discontigous buffer in separate buffer format structures. This means log
recovery will recover all the changes on a per segment basis without
requiring any knowledge of the fact that it was logged from a
compound buffer.

To do this, we need to be able to determine what buffer segment any
given offset into the compound buffer sits over. This enables us to
translate the dirty bitmap in the number of separate buffer format
structures required.

We also need to be able to determine the number of bitmap elements
that a given buffer segment has, as this determines the size of the
buffer format structure. Hence we need to be able to determine the
both the start offset into the buffer and the length of a given
segment to be able to calculate this.

With this information, we can preallocate, build and format the
correct log vector array for each segment in a compound buffer to
appear exactly the same as individually logged buffers in the log.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 52cd8f8..e4a6e4b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -153,33 +153,25 @@ STATIC void	xfs_buf_do_callbacks(struct xfs_buf *bp);
  * If the XFS_BLI_STALE flag has been set, then log nothing.
  */
 STATIC uint
-xfs_buf_item_size(
-	struct xfs_log_item	*lip)
+xfs_buf_item_size_segment(
+	struct xfs_buf_log_item	*bip,
+	struct xfs_buf_log_format *blfp)
 {
-	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	struct xfs_buf		*bp = bip->bli_buf;
 	uint			nvecs;
 	int			next_bit;
 	int			last_bit;
 
-	ASSERT(atomic_read(&bip->bli_refcount) > 0);
-	if (bip->bli_flags & XFS_BLI_STALE) {
-		/*
-		 * The buffer is stale, so all we need to log
-		 * is the buf log format structure with the
-		 * cancel flag in it.
-		 */
-		trace_xfs_buf_item_size_stale(bip);
-		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
-		return 1;
-	}
+	last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+	if (last_bit == -1)
+		return 0;
+
+	/*
+	 * initial count for a dirty buffer is 2 vectors - the format structure
+	 * and the first dirty region.
+	 */
+	nvecs = 2;
 
-	ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
-	nvecs = 1;
-	last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
-					 bip->bli_format.blf_map_size, 0);
-	ASSERT(last_bit != -1);
-	nvecs++;
 	while (last_bit != -1) {
 		/*
 		 * This takes the bit number to start looking from and
@@ -187,16 +179,15 @@ xfs_buf_item_size(
 		 * if there are no more bits set or the start bit is
 		 * beyond the end of the bitmap.
 		 */
-		next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
-						 bip->bli_format.blf_map_size,
-						 last_bit + 1);
+		next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+					last_bit + 1);
 		/*
 		 * If we run out of bits, leave the loop,
 		 * else if we find a new set of bits bump the number of vecs,
 		 * else keep scanning the current set of bits.
 		 */
 		if (next_bit == -1) {
-			last_bit = -1;
+			break;
 		} else if (next_bit != last_bit + 1) {
 			last_bit = next_bit;
 			nvecs++;
@@ -210,22 +201,73 @@ xfs_buf_item_size(
 		}
 	}
 
-	trace_xfs_buf_item_size(bip);
 	return nvecs;
 }
 
 /*
- * This is called to fill in the vector of log iovecs for the
- * given log buf item.  It fills the first entry with a buf log
- * format structure, and the rest point to contiguous chunks
- * within the buffer.
+ * This returns the number of log iovecs needed to log the given buf log item.
+ *
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
+ * stretch of non-contiguous chunks to be logged.  Contiguous chunks are logged
+ * in a single iovec.
+ *
+ * Discontiguous buffers need a format structure per region that that is being
+ * logged. This makes the changes in the buffer appear to log recovery as though
+ * they came from separate buffers, just like would occur if multiple buffers
+ * were used instead of a single discontiguous buffer. This enables
+ * discontiguous buffers to be in-memory constructs, completely transparent to
+ * what ends up on disk.
+ *
+ * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
+ * format structures.
  */
-STATIC void
-xfs_buf_item_format(
-	struct xfs_log_item	*lip,
-	struct xfs_log_iovec	*vecp)
+STATIC uint
+xfs_buf_item_size(
+	struct xfs_log_item	*lip)
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
+	uint			nvecs;
+	int			i;
+
+	ASSERT(atomic_read(&bip->bli_refcount) > 0);
+	if (bip->bli_flags & XFS_BLI_STALE) {
+		/*
+		 * The buffer is stale, so all we need to log
+		 * is the buf log format structure with the
+		 * cancel flag in it.
+		 */
+		trace_xfs_buf_item_size_stale(bip);
+		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+		return bip->bli_format_count;
+	}
+
+	ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+
+	/*
+	 * the vector count is based on the number of buffer vectors we have
+	 * dirty bits in. This will only be greater than one when we have a
+	 * compound buffer with more than one segment dirty. Hence for compound
+	 * buffers we need to track which segment the dirty bits correspond to,
+	 * and when we move from one segment to the next increment the vector
+	 * count for the extra buf log format structure that will need to be
+	 * written.
+	 */
+	nvecs = 0;
+	for (i = 0; i < bip->bli_format_count; i++) {
+		nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+	}
+
+	trace_xfs_buf_item_size(bip);
+	return nvecs;
+}
+
+static struct xfs_log_iovec *
+xfs_buf_item_format_segment(
+	struct xfs_buf_log_item	*bip,
+	struct xfs_log_iovec	*vecp,
+	uint			offset,
+	struct xfs_buf_log_format *blfp)
+{
 	struct xfs_buf	*bp = bip->bli_buf;
 	uint		base_size;
 	uint		nvecs;
@@ -235,9 +277,8 @@ xfs_buf_item_format(
 	uint		nbits;
 	uint		buffer_offset;
 
-	ASSERT(atomic_read(&bip->bli_refcount) > 0);
-	ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
-	       (bip->bli_flags & XFS_BLI_STALE));
+	/* copy the flags across from the base format item */
+	blfp->blf_flags = bip->bli_format.blf_flags;
 
 	/*
 	 * Base size is the actual size of the ondisk structure - it reflects
@@ -245,28 +286,13 @@ xfs_buf_item_format(
 	 * memory structure.
 	 */
 	base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
-			(bip->bli_format.blf_map_size *
-				sizeof(bip->bli_format.blf_data_map[0]));
-	vecp->i_addr = &bip->bli_format;
+			(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+	vecp->i_addr = blfp;
 	vecp->i_len = base_size;
 	vecp->i_type = XLOG_REG_TYPE_BFORMAT;
 	vecp++;
 	nvecs = 1;
 
-	/*
-	 * If it is an inode buffer, transfer the in-memory state to the
-	 * format flags and clear the in-memory state. We do not transfer
-	 * this state if the inode buffer allocation has not yet been committed
-	 * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
-	 * correct replay of the inode allocation.
-	 */
-	if (bip->bli_flags & XFS_BLI_INODE_BUF) {
-		if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
-		      xfs_log_item_in_current_chkpt(lip)))
-			bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
-		bip->bli_flags &= ~XFS_BLI_INODE_BUF;
-	}
-
 	if (bip->bli_flags & XFS_BLI_STALE) {
 		/*
 		 * The buffer is stale, so all we need to log
@@ -274,16 +300,15 @@ xfs_buf_item_format(
 		 * cancel flag in it.
 		 */
 		trace_xfs_buf_item_format_stale(bip);
-		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
-		bip->bli_format.blf_size = nvecs;
-		return;
+		ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
+		blfp->blf_size = nvecs;
+		return vecp;
 	}
 
 	/*
 	 * Fill in an iovec for each set of contiguous chunks.
 	 */
-	first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
-					 bip->bli_format.blf_map_size, 0);
+	first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
 	ASSERT(first_bit != -1);
 	last_bit = first_bit;
 	nbits = 1;
@@ -294,9 +319,8 @@ xfs_buf_item_format(
 		 * if there are no more bits set or the start bit is
 		 * beyond the end of the bitmap.
 		 */
-		next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
-						 bip->bli_format.blf_map_size,
-						 (uint)last_bit + 1);
+		next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+					(uint)last_bit + 1);
 		/*
 		 * If we run out of bits fill in the last iovec and get
 		 * out of the loop.
@@ -307,14 +331,14 @@ xfs_buf_item_format(
 		 * keep counting and scanning.
 		 */
 		if (next_bit == -1) {
-			buffer_offset = first_bit * XFS_BLF_CHUNK;
+			buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLF_CHUNK;
 			vecp->i_type = XLOG_REG_TYPE_BCHUNK;
 			nvecs++;
 			break;
 		} else if (next_bit != last_bit + 1) {
-			buffer_offset = first_bit * XFS_BLF_CHUNK;
+			buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLF_CHUNK;
 			vecp->i_type = XLOG_REG_TYPE_BCHUNK;
@@ -323,14 +347,17 @@ xfs_buf_item_format(
 			first_bit = next_bit;
 			last_bit = next_bit;
 			nbits = 1;
-		} else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) !=
-			   (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) +
+		} else if (xfs_buf_offset(bp, offset +
+					      (next_bit << XFS_BLF_SHIFT)) !=
+			   (xfs_buf_offset(bp, offset +
+					       (last_bit << XFS_BLF_SHIFT)) +
 			    XFS_BLF_CHUNK)) {
-			buffer_offset = first_bit * XFS_BLF_CHUNK;
+			buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLF_CHUNK;
 			vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/* You would think we need to bump the nvecs here too, but we do not
+/*
+ * You would think we need to bump the nvecs here too, but we do not
  * this number is used by recovery, and it gets confused by the boundary
  * split here
  *			nvecs++;
@@ -345,6 +372,48 @@ xfs_buf_item_format(
 		}
 	}
 	bip->bli_format.blf_size = nvecs;
+	return vecp;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given log buf item.  It fills the first entry with a buf log
+ * format structure, and the rest point to contiguous chunks
+ * within the buffer.
+ */
+STATIC void
+xfs_buf_item_format(
+	struct xfs_log_item	*lip,
+	struct xfs_log_iovec	*vecp)
+{
+	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
+	struct xfs_buf		*bp = bip->bli_buf;
+	uint			offset = 0;
+	int			i;
+
+	ASSERT(atomic_read(&bip->bli_refcount) > 0);
+	ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
+	       (bip->bli_flags & XFS_BLI_STALE));
+
+	/*
+	 * If it is an inode buffer, transfer the in-memory state to the
+	 * format flags and clear the in-memory state. We do not transfer
+	 * this state if the inode buffer allocation has not yet been committed
+	 * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
+	 * correct replay of the inode allocation.
+	 */
+	if (bip->bli_flags & XFS_BLI_INODE_BUF) {
+		if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+		      xfs_log_item_in_current_chkpt(lip)))
+			bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+		bip->bli_flags &= ~XFS_BLI_INODE_BUF;
+	}
+
+	for (i = 0; i < bip->bli_format_count; i++) {
+		vecp = xfs_buf_item_format_segment(bip, vecp, offset,
+						&bip->bli_formats[i]);
+		offset += bp->b_maps[i].bm_len;
+	}
 
 	/*
 	 * Check to make sure everything is consistent.
@@ -620,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_committing = xfs_buf_item_committing
 };
 
+STATIC int
+xfs_buf_item_get_format(
+	struct xfs_buf_log_item	*bip,
+	int			count)
+{
+	ASSERT(bip->bli_formats == NULL);
+	bip->bli_format_count = count;
+
+	if (count == 1) {
+		bip->bli_formats = &bip->bli_format;
+		return 0;
+	}
+
+	bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
+				KM_SLEEP);
+	if (!bip->bli_formats)
+		return ENOMEM;
+	return 0;
+}
+
+STATIC void
+xfs_buf_item_free_format(
+	struct xfs_buf_log_item	*bip)
+{
+	if (bip->bli_formats != &bip->bli_format) {
+		kmem_free(bip->bli_formats);
+		bip->bli_formats = NULL;
+	}
+}
 
 /*
  * Allocate a new buf log item to go with the given buffer.
@@ -637,6 +735,8 @@ xfs_buf_item_init(
 	xfs_buf_log_item_t	*bip;
 	int			chunks;
 	int			map_size;
+	int			error;
+	int			i;
 
 	/*
 	 * Check to see if there is already a buf log item for
@@ -648,25 +748,33 @@ xfs_buf_item_init(
 	if (lip != NULL && lip->li_type == XFS_LI_BUF)
 		return;
 
-	/*
-	 * chunks is the number of XFS_BLF_CHUNK size pieces
-	 * the buffer can be divided into. Make sure not to
-	 * truncate any pieces.  map_size is the size of the
-	 * bitmap needed to describe the chunks of the buffer.
-	 */
-	chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
-								XFS_BLF_SHIFT);
-	map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
-
-	bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
-						    KM_SLEEP);
+	bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
 	xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
 	bip->bli_buf = bp;
 	xfs_buf_hold(bp);
-	bip->bli_format.blf_type = XFS_LI_BUF;
-	bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
-	bip->bli_format.blf_len = (ushort)bp->b_length;
-	bip->bli_format.blf_map_size = map_size;
+
+	/*
+	 * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
+	 * can be divided into. Make sure not to truncate any pieces.
+	 * map_size is the size of the bitmap needed to describe the
+	 * chunks of the buffer.
+	 *
+	 * Discontiguous buffer support follows the layout of the underlying
+	 * buffer. This makes the implementation as simple as possible.
+	 */
+	error = xfs_buf_item_get_format(bip, bp->b_map_count);
+	ASSERT(error == 0);
+
+	for (i = 0; i < bip->bli_format_count; i++) {
+		chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
+				      XFS_BLF_CHUNK);
+		map_size = DIV_ROUND_UP(chunks, NBWORD);
+
+		bip->bli_formats[i].blf_type = XFS_LI_BUF;
+		bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
+		bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
+		bip->bli_formats[i].blf_map_size = map_size;
+	}
 
 #ifdef XFS_TRANS_DEBUG
 	/*
@@ -697,10 +805,11 @@ xfs_buf_item_init(
  * item's bitmap.
  */
 void
-xfs_buf_item_log(
-	xfs_buf_log_item_t	*bip,
+xfs_buf_item_log_segment(
+	struct xfs_buf_log_item	*bip,
 	uint			first,
-	uint			last)
+	uint			last,
+	uint			*map)
 {
 	uint		first_bit;
 	uint		last_bit;
@@ -713,12 +822,6 @@ xfs_buf_item_log(
 	uint		mask;
 
 	/*
-	 * Mark the item as having some dirty data for
-	 * quick reference in xfs_buf_item_dirty.
-	 */
-	bip->bli_flags |= XFS_BLI_DIRTY;
-
-	/*
 	 * Convert byte offsets to bit numbers.
 	 */
 	first_bit = first >> XFS_BLF_SHIFT;
@@ -734,7 +837,7 @@ xfs_buf_item_log(
 	 * to set a bit in.
 	 */
 	word_num = first_bit >> BIT_TO_WORD_SHIFT;
-	wordp = &(bip->bli_format.blf_data_map[word_num]);
+	wordp = &map[word_num];
 
 	/*
 	 * Calculate the starting bit in the first word.
@@ -781,6 +884,51 @@ xfs_buf_item_log(
 	xfs_buf_item_log_debug(bip, first, last);
 }
 
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+	xfs_buf_log_item_t	*bip,
+	uint			first,
+	uint			last)
+{
+	int			i;
+	uint			start;
+	uint			end;
+	struct xfs_buf		*bp = bip->bli_buf;
+
+	/*
+	 * Mark the item as having some dirty data for
+	 * quick reference in xfs_buf_item_dirty.
+	 */
+	bip->bli_flags |= XFS_BLI_DIRTY;
+
+	/*
+	 * walk each buffer segment and mark them dirty appropriately.
+	 */
+	start = 0;
+	for (i = 0; i < bip->bli_format_count; i++) {
+		if (start > last)
+			break;
+		end = start + BBTOB(bp->b_maps[i].bm_len);
+		if (first > end) {
+			start += BBTOB(bp->b_maps[i].bm_len);
+			continue;
+		}
+		if (first < start)
+			first = start;
+		if (end > last)
+			end = last;
+
+		xfs_buf_item_log_segment(bip, first, end,
+					 &bip->bli_formats[i].blf_data_map[0]);
+
+		start += bp->b_maps[i].bm_len;
+	}
+}
+
 
 /*
  * Return 1 if the buffer has some data that has been logged (at any
@@ -802,6 +950,7 @@ xfs_buf_item_free(
 	kmem_free(bip->bli_logged);
 #endif /* XFS_TRANS_DEBUG */
 
+	xfs_buf_item_free_format(bip);
 	kmem_zone_free(xfs_buf_item_zone, bip);
 }
 
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index ff26867..6850f49 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -102,6 +102,8 @@ typedef struct xfs_buf_log_item {
 	char			*bli_orig;	/* original buffer copy */
 	char			*bli_logged;	/* bytes logged (bitmap) */
 #endif
+	int			bli_format_count;	/* count of headers */
+	struct xfs_buf_log_format *bli_formats;	/* array of in-log header ptrs */
 	struct xfs_buf_log_format bli_format;	/* embedded in-log header */
 } xfs_buf_log_item_t;
 
-- 
cgit v0.10.2


From 3605431fb9739a30ccd0c6380ae8e3c6f8e670a5 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:13 +1000
Subject: xfs: use discontiguous xfs_buf support in dabuf wrappers

First step in converting the directory code to use native
discontiguous buffers and replacing the dabuf construct.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 015b946..76e5dba 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -85,7 +85,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
  */
 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
+STATIC xfs_dabuf_t *xfs_da_buf_make(xfs_buf_t *bp);
 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
 				  xfs_da_state_blk_t *drop_blk,
 				  xfs_da_state_blk_t *save_blk);
@@ -1967,35 +1967,75 @@ xfs_da_map_covers_blocks(
 }
 
 /*
- * Make a dabuf.
- * Used for get_buf, read_buf, read_bufr, and reada_buf.
+ * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
+ *
+ * For the single map case, it is assumed that the caller has provided a pointer
+ * to a valid xfs_buf_map.  For the multiple map case, this function will
+ * allocate the xfs_buf_map to hold all the maps and replace the caller's single
+ * map pointer with the allocated map.
  */
-STATIC int
-xfs_da_do_buf(
-	xfs_trans_t	*trans,
-	xfs_inode_t	*dp,
-	xfs_dablk_t	bno,
-	xfs_daddr_t	*mappedbnop,
-	xfs_dabuf_t	**bpp,
-	int		whichfork,
-	int		caller)
+static int
+xfs_buf_map_from_irec(
+	struct xfs_mount	*mp,
+	struct xfs_buf_map	**mapp,
+	unsigned int		*nmaps,
+	struct xfs_bmbt_irec	*irecs,
+	unsigned int		nirecs)
 {
-	xfs_buf_t	*bp = NULL;
-	xfs_buf_t	**bplist;
-	int		error=0;
-	int		i;
-	xfs_bmbt_irec_t	map;
-	xfs_bmbt_irec_t	*mapp;
-	xfs_daddr_t	mappedbno;
-	xfs_mount_t	*mp;
-	int		nbplist=0;
-	int		nfsb;
-	int		nmap;
-	xfs_dabuf_t	*rbp;
+	struct xfs_buf_map	*map;
+	int			i;
+
+	ASSERT(*nmaps == 1);
+	ASSERT(nirecs >= 1);
+
+	if (nirecs > 1) {
+		map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
+		if (!map)
+			return ENOMEM;
+		*mapp = map;
+	}
+
+	*nmaps = nirecs;
+	map = *mapp;
+	for (i = 0; i < *nmaps; i++) {
+		ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
+		       irecs[i].br_startblock != HOLESTARTBLOCK);
+		map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
+		map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
+	}
+	return 0;
+}
+
+/*
+ * Map the block we are given ready for reading. There are three possible return
+ * values:
+ *	-1 - will be returned if we land in a hole and mappedbno == -2 so the
+ *	     caller knows not to execute a subsequent read.
+ *	 0 - if we mapped the block successfully
+ *	>0 - positive error number if there was an error.
+ */
+static int
+xfs_dabuf_map(
+	struct xfs_trans	*trans,
+	struct xfs_inode	*dp,
+	xfs_dablk_t		bno,
+	xfs_daddr_t		mappedbno,
+	int			whichfork,
+	struct xfs_buf_map	**map,
+	int			*nmaps)
+{
+	struct xfs_mount	*mp = dp->i_mount;
+	int			nfsb;
+	int			error = 0;
+	struct xfs_bmbt_irec	irec;
+	struct xfs_bmbt_irec	*irecs = &irec;
+	int			nirecs;
+
+	ASSERT(map && *map);
+	ASSERT(*nmaps == 1);
 
-	mp = dp->i_mount;
 	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
-	mappedbno = *mappedbnop;
+
 	/*
 	 * Caller doesn't have a mapping.  -2 means don't complain
 	 * if we land in a hole.
@@ -2004,112 +2044,152 @@ xfs_da_do_buf(
 		/*
 		 * Optimize the one-block case.
 		 */
-		if (nfsb == 1)
-			mapp = &map;
-		else
-			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
+		if (nfsb != 1)
+			irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
 
-		nmap = nfsb;
-		error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
-				       &nmap, xfs_bmapi_aflag(whichfork));
+		nirecs = nfsb;
+		error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
+				       &nirecs, xfs_bmapi_aflag(whichfork));
 		if (error)
-			goto exit0;
+			goto out;
 	} else {
-		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
-		map.br_startoff = (xfs_fileoff_t)bno;
-		map.br_blockcount = nfsb;
-		mapp = &map;
-		nmap = 1;
+		irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+		irecs->br_startoff = (xfs_fileoff_t)bno;
+		irecs->br_blockcount = nfsb;
+		irecs->br_state = 0;
+		nirecs = 1;
 	}
-	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
-		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
+
+	if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
+		error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
 		if (unlikely(error == EFSCORRUPTED)) {
 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+				int i;
 				xfs_alert(mp, "%s: bno %lld dir: inode %lld",
 					__func__, (long long)bno,
 					(long long)dp->i_ino);
-				for (i = 0; i < nmap; i++) {
+				for (i = 0; i < *nmaps; i++) {
 					xfs_alert(mp,
 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
 						i,
-						(long long)mapp[i].br_startoff,
-						(long long)mapp[i].br_startblock,
-						(long long)mapp[i].br_blockcount,
-						mapp[i].br_state);
+						(long long)irecs[i].br_startoff,
+						(long long)irecs[i].br_startblock,
+						(long long)irecs[i].br_blockcount,
+						irecs[i].br_state);
 				}
 			}
 			XFS_ERROR_REPORT("xfs_da_do_buf(1)",
 					 XFS_ERRLEVEL_LOW, mp);
 		}
-		goto exit0;
+		goto out;
 	}
-	if (caller != 3 && nmap > 1) {
-		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
-		nbplist = 0;
-	} else
-		bplist = NULL;
-	/*
-	 * Turn the mapping(s) into buffer(s).
-	 */
-	for (i = 0; i < nmap; i++) {
-		int	nmapped;
-
-		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
-		if (i == 0)
-			*mappedbnop = mappedbno;
-		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
-		switch (caller) {
-		case 0:
-			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
-				mappedbno, nmapped, 0);
-			error = bp ? bp->b_error : XFS_ERROR(EIO);
-			break;
-		case 1:
-		case 2:
-			bp = NULL;
-			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
-				mappedbno, nmapped, 0, &bp);
-			break;
-		case 3:
-			xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
+	error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
+out:
+	if (irecs != &irec)
+		kmem_free(irecs);
+	return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+	struct xfs_trans	*trans,
+	struct xfs_inode	*dp,
+	xfs_dablk_t		bno,
+	xfs_daddr_t		mappedbno,
+	xfs_dabuf_t		**bpp,
+	int			whichfork)
+{
+	struct xfs_buf		*bp;
+	struct xfs_buf_map	map;
+	struct xfs_buf_map	*mapp;
+	int			nmap;
+	int			error;
+
+	*bpp = NULL;
+	mapp = &map;
+	nmap = 1;
+	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+				&mapp, &nmap);
+	if (error) {
+		/* mapping a hole is not an error, but we don't continue */
+		if (error == -1)
 			error = 0;
-			bp = NULL;
-			break;
-		}
-		if (error) {
-			if (bp)
-				xfs_trans_brelse(trans, bp);
-			goto exit1;
-		}
-		if (!bp)
-			continue;
-		if (caller == 1) {
-			if (whichfork == XFS_ATTR_FORK)
-				xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
-			else
-				xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-		}
-		if (bplist) {
-			bplist[nbplist++] = bp;
-		}
+		goto out_free;
 	}
-	/*
-	 * Build a dabuf structure.
-	 */
-	if (bplist) {
-		rbp = xfs_da_buf_make(nbplist, bplist);
-	} else if (bp)
-		rbp = xfs_da_buf_make(1, &bp);
+
+	bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
+				    mapp, nmap, 0);
+	error = bp ? bp->b_error : XFS_ERROR(EIO);
+	if (error) {
+		xfs_trans_brelse(trans, bp);
+		goto out_free;
+	}
+
+	*bpp = xfs_da_buf_make(bp);
+
+out_free:
+	if (mapp != &map)
+		kmem_free(mapp);
+
+	return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+	struct xfs_trans	*trans,
+	struct xfs_inode	*dp,
+	xfs_dablk_t		bno,
+	xfs_daddr_t		mappedbno,
+	xfs_dabuf_t		**bpp,
+	int			whichfork)
+{
+	struct xfs_buf		*bp;
+	struct xfs_buf_map	map;
+	struct xfs_buf_map	*mapp;
+	int			nmap;
+	int			error;
+
+	*bpp = NULL;
+	mapp = &map;
+	nmap = 1;
+	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+				&mapp, &nmap);
+	if (error) {
+		/* mapping a hole is not an error, but we don't continue */
+		if (error == -1)
+			error = 0;
+		goto out_free;
+	}
+
+	error = xfs_trans_read_buf_map(dp->i_mount, trans,
+					dp->i_mount->m_ddev_targp,
+					mapp, nmap, 0, &bp);
+	if (error)
+		goto out_free;
+
+	if (whichfork == XFS_ATTR_FORK)
+		xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
 	else
-		rbp = NULL;
+		xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
+
+	*bpp = xfs_da_buf_make(bp);
+
 	/*
-	 * For read_buf, check the magic number.
+	 * This verification code will be moved to a CRC verification callback
+	 * function so just leave it here unchanged until then.
 	 */
-	if (caller == 1) {
-		xfs_dir2_data_hdr_t	*hdr = rbp->data;
-		xfs_dir2_free_t		*free = rbp->data;
-		xfs_da_blkinfo_t	*info = rbp->data;
+	{
+		xfs_dir2_data_hdr_t	*hdr = (*bpp)->data;
+		xfs_dir2_free_t		*free = (*bpp)->data;
+		xfs_da_blkinfo_t	*info = (*bpp)->data;
 		uint			magic, magic1;
+		struct xfs_mount	*mp = dp->i_mount;
 
 		magic = be16_to_cpu(info->magic);
 		magic1 = be32_to_cpu(hdr->magic);
@@ -2123,66 +2203,20 @@ xfs_da_do_buf(
 				   (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
 				mp, XFS_ERRTAG_DA_READ_BUF,
 				XFS_RANDOM_DA_READ_BUF))) {
-			trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
+			trace_xfs_da_btree_corrupt(bp, _RET_IP_);
 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
 					     XFS_ERRLEVEL_LOW, mp, info);
 			error = XFS_ERROR(EFSCORRUPTED);
-			xfs_da_brelse(trans, rbp);
-			nbplist = 0;
-			goto exit1;
+			xfs_da_brelse(trans, *bpp);
+			goto out_free;
 		}
 	}
-	if (bplist) {
-		kmem_free(bplist);
-	}
-	if (mapp != &map) {
-		kmem_free(mapp);
-	}
-	if (bpp)
-		*bpp = rbp;
-	return 0;
-exit1:
-	if (bplist) {
-		for (i = 0; i < nbplist; i++)
-			xfs_trans_brelse(trans, bplist[i]);
-		kmem_free(bplist);
-	}
-exit0:
+
+out_free:
 	if (mapp != &map)
 		kmem_free(mapp);
-	if (bpp)
-		*bpp = NULL;
-	return error;
-}
-
-/*
- * Get a buffer for the dir/attr block.
- */
-int
-xfs_da_get_buf(
-	xfs_trans_t	*trans,
-	xfs_inode_t	*dp,
-	xfs_dablk_t	bno,
-	xfs_daddr_t		mappedbno,
-	xfs_dabuf_t	**bpp,
-	int		whichfork)
-{
-	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
-}
 
-/*
- * Get a buffer for the dir/attr block, fill in the contents.
- */
-int
-xfs_da_read_buf(
-	xfs_trans_t	*trans,
-	xfs_inode_t	*dp,
-	xfs_dablk_t	bno,
-	xfs_daddr_t		mappedbno,
-	xfs_dabuf_t	**bpp,
-	int		whichfork)
-{
-	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
+	return error;
 }
 
 /*
@@ -2190,18 +2224,38 @@ xfs_da_read_buf(
  */
 xfs_daddr_t
 xfs_da_reada_buf(
-	xfs_trans_t	*trans,
-	xfs_inode_t	*dp,
-	xfs_dablk_t	bno,
-	int		whichfork)
+	struct xfs_trans	*trans,
+	struct xfs_inode	*dp,
+	xfs_dablk_t		bno,
+	int			whichfork)
 {
-	xfs_daddr_t		rval;
+	xfs_daddr_t		mappedbno = -1;
+	struct xfs_buf_map	map;
+	struct xfs_buf_map	*mapp;
+	int			nmap;
+	int			error;
+
+	mapp = &map;
+	nmap = 1;
+	error = xfs_dabuf_map(trans, dp, bno, -1, whichfork,
+				&mapp, &nmap);
+	if (error) {
+		/* mapping a hole is not an error, but we don't continue */
+		if (error == -1)
+			error = 0;
+		goto out_free;
+	}
 
-	rval = -1;
-	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
+	mappedbno = mapp[0].bm_bn;
+	xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap);
+
+out_free:
+	if (mapp != &map)
+		kmem_free(mapp);
+
+	if (error)
 		return -1;
-	else
-		return rval;
+	return mappedbno;
 }
 
 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
@@ -2261,78 +2315,25 @@ xfs_da_state_free(xfs_da_state_t *state)
  */
 /* ARGSUSED */
 STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
+xfs_da_buf_make(xfs_buf_t *bp)
 {
-	xfs_buf_t	*bp;
 	xfs_dabuf_t	*dabuf;
-	int		i;
-	int		off;
 
-	if (nbuf == 1)
-		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
-	else
-		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
-	dabuf->dirty = 0;
-	if (nbuf == 1) {
-		dabuf->nbuf = 1;
-		bp = bps[0];
-		dabuf->bbcount = bp->b_length;
-		dabuf->data = bp->b_addr;
-		dabuf->bps[0] = bp;
-	} else {
-		dabuf->nbuf = nbuf;
-		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
-			dabuf->bps[i] = bp = bps[i];
-			dabuf->bbcount += bp->b_length;
-		}
-		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
-		for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) {
-			bp = bps[i];
-			memcpy((char *)dabuf->data + off, bp->b_addr,
-				BBTOB(bp->b_length));
-		}
-	}
+	dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
+	dabuf->bbcount = bp->b_length;
+	dabuf->data = bp->b_addr;
+	dabuf->bp = bp;
 	return dabuf;
 }
 
 /*
- * Un-dirty a dabuf.
- */
-STATIC void
-xfs_da_buf_clean(xfs_dabuf_t *dabuf)
-{
-	xfs_buf_t	*bp;
-	int		i;
-	int		off;
-
-	if (dabuf->dirty) {
-		ASSERT(dabuf->nbuf > 1);
-		dabuf->dirty = 0;
-		for (i = off = 0; i < dabuf->nbuf;
-				i++, off += BBTOB(bp->b_length)) {
-			bp = dabuf->bps[i];
-			memcpy(bp->b_addr, dabuf->data + off,
-						BBTOB(bp->b_length));
-		}
-	}
-}
-
-/*
  * Release a dabuf.
  */
 void
 xfs_da_buf_done(xfs_dabuf_t *dabuf)
 {
-	ASSERT(dabuf);
-	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-	if (dabuf->dirty)
-		xfs_da_buf_clean(dabuf);
-	if (dabuf->nbuf > 1) {
-		kmem_free(dabuf->data);
-		kmem_free(dabuf);
-	} else {
-		kmem_zone_free(xfs_dabuf_zone, dabuf);
-	}
+	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
+	kmem_zone_free(xfs_dabuf_zone, dabuf);
 }
 
 /*
@@ -2341,41 +2342,9 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 void
 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
 {
-	xfs_buf_t	*bp;
-	uint		f;
-	int		i;
-	uint		l;
-	int		off;
-
-	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-	if (dabuf->nbuf == 1) {
-		ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
-		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
-		return;
-	}
-	dabuf->dirty = 1;
-	ASSERT(first <= last);
-	for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) {
-		bp = dabuf->bps[i];
-		f = off;
-		l = f + BBTOB(bp->b_length) - 1;
-		if (f < first)
-			f = first;
-		if (l > last)
-			l = last;
-		if (f <= l)
-			xfs_trans_log_buf(tp, bp, f - off, l - off);
-		/*
-		 * B_DONE is set by xfs_trans_log buf.
-		 * If we don't set it on a new buffer (get not read)
-		 * then if we don't put anything in the buffer it won't
-		 * be set, and at commit it it released into the cache,
-		 * and then a read will fail.
-		 */
-		else if (!(XFS_BUF_ISDONE(bp)))
-		  XFS_BUF_DONE(bp);
-	}
-	ASSERT(last < off);
+	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
+	ASSERT(dabuf->data == dabuf->bp->b_addr);
+	xfs_trans_log_buf(tp, dabuf->bp, first, last);
 }
 
 /*
@@ -2386,24 +2355,9 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
 void
 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 {
-	xfs_buf_t	*bp;
-	xfs_buf_t	**bplist;
-	int		i;
-	int		nbuf;
-
-	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-	if ((nbuf = dabuf->nbuf) == 1) {
-		bplist = &bp;
-		bp = dabuf->bps[0];
-	} else {
-		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-	}
+	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
+	xfs_trans_brelse(tp, dabuf->bp);
 	xfs_da_buf_done(dabuf);
-	for (i = 0; i < nbuf; i++)
-		xfs_trans_brelse(tp, bplist[i]);
-	if (bplist != &bp)
-		kmem_free(bplist);
 }
 
 /*
@@ -2412,24 +2366,9 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 void
 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 {
-	xfs_buf_t	*bp;
-	xfs_buf_t	**bplist;
-	int		i;
-	int		nbuf;
-
-	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-	if ((nbuf = dabuf->nbuf) == 1) {
-		bplist = &bp;
-		bp = dabuf->bps[0];
-	} else {
-		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-	}
+	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
 	xfs_da_buf_done(dabuf);
-	for (i = 0; i < nbuf; i++)
-		xfs_trans_binval(tp, bplist[i]);
-	if (bplist != &bp)
-		kmem_free(bplist);
+	xfs_trans_binval(tp, dabuf->bp);
 }
 
 /*
@@ -2438,7 +2377,6 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 xfs_daddr_t
 xfs_da_blkno(xfs_dabuf_t *dabuf)
 {
-	ASSERT(dabuf->nbuf);
 	ASSERT(dabuf->data);
-	return XFS_BUF_ADDR(dabuf->bps[0]);
+	return XFS_BUF_ADDR(dabuf->bp);
 }
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index dbf7c07..0b64c4a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -141,14 +141,10 @@ typedef struct xfs_da_args {
  * same place as the b_addr field for the buffer, else to kmem_alloced memory.
  */
 typedef struct xfs_dabuf {
-	int		nbuf;		/* number of buffer pointers present */
-	short		dirty;		/* data needs to be copied back */
 	short		bbcount;	/* how large is data in bbs */
 	void		*data;		/* pointer for buffers' data */
-	struct xfs_buf	*bps[1];	/* actually nbuf of these */
+	struct xfs_buf	*bp;		/* actually nbuf of these */
 } xfs_dabuf_t;
-#define	XFS_DA_BUF_SIZE(n)	\
-	(sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
 
 /*
  * Storage for holding state during Btree searches and split/join ops.
-- 
cgit v0.10.2


From 1d9025e56143c0c4aebebdb62e46618d3d284218 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:14 +1000
Subject: xfs: remove struct xfs_dabuf and infrastructure

The struct xfs_dabuf now only tracks a single xfs_buf and all the
information it holds can be gained directly from the xfs_buf. Hence
we can remove the struct dabuf and pass the xfs_buf around
everywhere.

Kill the struct dabuf and the associated infrastructure.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index a17ff01..0ca1f0b 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -893,7 +893,7 @@ STATIC int
 xfs_attr_leaf_addname(xfs_da_args_t *args)
 {
 	xfs_inode_t *dp;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int retval, error, committed, forkoff;
 
 	trace_xfs_attr_leaf_addname(args);
@@ -915,11 +915,11 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 	 */
 	retval = xfs_attr_leaf_lookup_int(bp, args);
 	if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
-		xfs_da_brelse(args->trans, bp);
+		xfs_trans_brelse(args->trans, bp);
 		return(retval);
 	} else if (retval == EEXIST) {
 		if (args->flags & ATTR_CREATE) {	/* pure create op */
-			xfs_da_brelse(args->trans, bp);
+			xfs_trans_brelse(args->trans, bp);
 			return(retval);
 		}
 
@@ -937,7 +937,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 	 * if required.
 	 */
 	retval = xfs_attr_leaf_add(bp, args);
-	xfs_da_buf_done(bp);
 	if (retval == ENOSPC) {
 		/*
 		 * Promote the attribute list to the Btree format, then
@@ -1065,8 +1064,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 			 */
 			if (committed)
 				xfs_trans_ijoin(args->trans, dp, 0);
-		} else
-			xfs_da_buf_done(bp);
+		}
 
 		/*
 		 * Commit the remove and start the next trans in series.
@@ -1092,7 +1090,7 @@ STATIC int
 xfs_attr_leaf_removename(xfs_da_args_t *args)
 {
 	xfs_inode_t *dp;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error, committed, forkoff;
 
 	trace_xfs_attr_leaf_removename(args);
@@ -1111,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 	error = xfs_attr_leaf_lookup_int(bp, args);
 	if (error == ENOATTR) {
-		xfs_da_brelse(args->trans, bp);
+		xfs_trans_brelse(args->trans, bp);
 		return(error);
 	}
 
@@ -1141,8 +1139,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 		 */
 		if (committed)
 			xfs_trans_ijoin(args->trans, dp, 0);
-	} else
-		xfs_da_buf_done(bp);
+	}
 	return(0);
 }
 
@@ -1155,7 +1152,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 STATIC int
 xfs_attr_leaf_get(xfs_da_args_t *args)
 {
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	args->blkno = 0;
@@ -1167,11 +1164,11 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
 
 	error = xfs_attr_leaf_lookup_int(bp, args);
 	if (error != EEXIST)  {
-		xfs_da_brelse(args->trans, bp);
+		xfs_trans_brelse(args->trans, bp);
 		return(error);
 	}
 	error = xfs_attr_leaf_getvalue(bp, args);
-	xfs_da_brelse(args->trans, bp);
+	xfs_trans_brelse(args->trans, bp);
 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
 		error = xfs_attr_rmtval_get(args);
 	}
@@ -1186,23 +1183,23 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 {
 	xfs_attr_leafblock_t *leaf;
 	int error;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 
 	context->cursor->blkno = 0;
 	error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
 	if (error)
 		return XFS_ERROR(error);
 	ASSERT(bp != NULL);
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
 		XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
 				     context->dp->i_mount, leaf);
-		xfs_da_brelse(NULL, bp);
+		xfs_trans_brelse(NULL, bp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
 	error = xfs_attr_leaf_list_int(bp, context);
-	xfs_da_brelse(NULL, bp);
+	xfs_trans_brelse(NULL, bp);
 	return XFS_ERROR(error);
 }
 
@@ -1489,7 +1486,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 	xfs_da_state_t *state;
 	xfs_da_state_blk_t *blk;
 	xfs_inode_t *dp;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int retval, error, committed, forkoff;
 
 	trace_xfs_attr_node_removename(args);
@@ -1601,14 +1598,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 		 */
 		ASSERT(state->path.active == 1);
 		ASSERT(state->path.blk[0].bp);
-		xfs_da_buf_done(state->path.blk[0].bp);
 		state->path.blk[0].bp = NULL;
 
 		error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
 						     XFS_ATTR_FORK);
 		if (error)
 			goto out;
-		ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
+		ASSERT((((xfs_attr_leafblock_t *)bp->b_addr)->hdr.info.magic) ==
 		       cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
@@ -1635,7 +1631,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 			if (committed)
 				xfs_trans_ijoin(args->trans, dp, 0);
 		} else
-			xfs_da_brelse(args->trans, bp);
+			xfs_trans_brelse(args->trans, bp);
 	}
 	error = 0;
 
@@ -1665,8 +1661,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
 		if (blk->bp) {
-			blk->disk_blkno = xfs_da_blkno(blk->bp);
-			xfs_da_buf_done(blk->bp);
+			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
 			blk->bp = NULL;
 		} else {
 			blk->disk_blkno = 0;
@@ -1681,8 +1676,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
 		if (blk->bp) {
-			blk->disk_blkno = xfs_da_blkno(blk->bp);
-			xfs_da_buf_done(blk->bp);
+			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
 			blk->bp = NULL;
 		} else {
 			blk->disk_blkno = 0;
@@ -1792,7 +1786,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
 	 * If not in a transaction, we have to release all the buffers.
 	 */
 	for (i = 0; i < state->path.active; i++) {
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
 		state->path.blk[i].bp = NULL;
 	}
 
@@ -1808,7 +1802,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	xfs_da_intnode_t *node;
 	xfs_da_node_entry_t *btree;
 	int error, i;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 
 	cursor = context->cursor;
 	cursor->initted = 1;
@@ -1825,30 +1819,30 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		if ((error != 0) && (error != EFSCORRUPTED))
 			return(error);
 		if (bp) {
-			node = bp->data;
+			node = bp->b_addr;
 			switch (be16_to_cpu(node->hdr.info.magic)) {
 			case XFS_DA_NODE_MAGIC:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_da_brelse(NULL, bp);
+				xfs_trans_brelse(NULL, bp);
 				bp = NULL;
 				break;
 			case XFS_ATTR_LEAF_MAGIC:
-				leaf = bp->data;
+				leaf = bp->b_addr;
 				if (cursor->hashval > be32_to_cpu(leaf->entries[
 				    be16_to_cpu(leaf->hdr.count)-1].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_da_brelse(NULL, bp);
+					xfs_trans_brelse(NULL, bp);
 					bp = NULL;
 				} else if (cursor->hashval <=
 					     be32_to_cpu(leaf->entries[0].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_da_brelse(NULL, bp);
+					xfs_trans_brelse(NULL, bp);
 					bp = NULL;
 				}
 				break;
 			default:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_da_brelse(NULL, bp);
+				xfs_trans_brelse(NULL, bp);
 				bp = NULL;
 			}
 		}
@@ -1873,7 +1867,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 						 context->dp->i_mount);
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
-			node = bp->data;
+			node = bp->b_addr;
 			if (node->hdr.info.magic ==
 			    cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
 				break;
@@ -1883,7 +1877,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 						     XFS_ERRLEVEL_LOW,
 						     context->dp->i_mount,
 						     node);
-				xfs_da_brelse(NULL, bp);
+				xfs_trans_brelse(NULL, bp);
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			btree = node->btree;
@@ -1898,10 +1892,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				}
 			}
 			if (i == be16_to_cpu(node->hdr.count)) {
-				xfs_da_brelse(NULL, bp);
+				xfs_trans_brelse(NULL, bp);
 				return(0);
 			}
-			xfs_da_brelse(NULL, bp);
+			xfs_trans_brelse(NULL, bp);
 		}
 	}
 	ASSERT(bp != NULL);
@@ -1912,24 +1906,24 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 * adding the information.
 	 */
 	for (;;) {
-		leaf = bp->data;
+		leaf = bp->b_addr;
 		if (unlikely(leaf->hdr.info.magic !=
 			     cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
 			XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
 					     XFS_ERRLEVEL_LOW,
 					     context->dp->i_mount, leaf);
-			xfs_da_brelse(NULL, bp);
+			xfs_trans_brelse(NULL, bp);
 			return(XFS_ERROR(EFSCORRUPTED));
 		}
 		error = xfs_attr_leaf_list_int(bp, context);
 		if (error) {
-			xfs_da_brelse(NULL, bp);
+			xfs_trans_brelse(NULL, bp);
 			return error;
 		}
 		if (context->seen_enough || leaf->hdr.info.forw == 0)
 			break;
 		cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
-		xfs_da_brelse(NULL, bp);
+		xfs_trans_brelse(NULL, bp);
 		error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
 					      &bp, XFS_ATTR_FORK);
 		if (error)
@@ -1941,7 +1935,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 			return(XFS_ERROR(EFSCORRUPTED));
 		}
 	}
-	xfs_da_brelse(NULL, bp);
+	xfs_trans_brelse(NULL, bp);
 	return(0);
 }
 
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7d89d80..d330111 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -54,10 +54,10 @@
  * Routines used for growing the Btree.
  */
 STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
-				    xfs_dabuf_t **bpp);
-STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
-					      int freemap_index);
-STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer);
+				struct xfs_buf **bpp);
+STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer,
+				  xfs_da_args_t *args, int freemap_index);
+STATIC void xfs_attr_leaf_compact(xfs_trans_t *tp, struct xfs_buf *leaf_buffer);
 STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state,
 						   xfs_da_state_blk_t *blk1,
 						   xfs_da_state_blk_t *blk2);
@@ -71,9 +71,9 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
  * Routines used for shrinking the Btree.
  */
 STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
-				  xfs_dabuf_t *bp, int level);
+				  struct xfs_buf *bp, int level);
 STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
-				  xfs_dabuf_t *bp);
+				  struct xfs_buf *bp);
 STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 				   xfs_dablk_t blkno, int blkcnt);
 
@@ -480,7 +480,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	char *tmpbuffer;
 	int error, i, size;
 	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	xfs_ifork_t *ifp;
 
 	trace_xfs_attr_sf_to_leaf(args);
@@ -550,8 +550,6 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	error = 0;
 
 out:
-	if(bp)
-		xfs_da_buf_done(bp);
 	kmem_free(tmpbuffer);
 	return(error);
 }
@@ -737,14 +735,16 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
  * a shortform attribute list.
  */
 int
-xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
+xfs_attr_shortform_allfit(
+	struct xfs_buf	*bp,
+	struct xfs_inode *dp)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
 	xfs_attr_leaf_name_local_t *name_loc;
 	int bytes, i;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 
 	entry = &leaf->entries[0];
@@ -774,7 +774,10 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
  * Convert a leaf attribute list to shortform attribute list
  */
 int
-xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
+xfs_attr_leaf_to_shortform(
+	struct xfs_buf	*bp,
+	xfs_da_args_t	*args,
+	int		forkoff)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
@@ -791,10 +794,10 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	ASSERT(tmpbuffer != NULL);
 
 	ASSERT(bp != NULL);
-	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
+	memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
 	 * Clean out the prior contents of the attribute list.
@@ -855,7 +858,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	xfs_attr_leafblock_t *leaf;
 	xfs_da_intnode_t *node;
 	xfs_inode_t *dp;
-	xfs_dabuf_t *bp1, *bp2;
+	struct xfs_buf *bp1, *bp2;
 	xfs_dablk_t blkno;
 	int error;
 
@@ -877,10 +880,9 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	if (error)
 		goto out;
 	ASSERT(bp2 != NULL);
-	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
-	xfs_da_buf_done(bp1);
+	memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount));
 	bp1 = NULL;
-	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
+	xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
 	/*
 	 * Set up the new root node.
@@ -888,21 +890,17 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
 	if (error)
 		goto out;
-	node = bp1->data;
-	leaf = bp2->data;
+	node = bp1->b_addr;
+	leaf = bp2->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
 	node->btree[0].before = cpu_to_be32(blkno);
 	node->hdr.count = cpu_to_be16(1);
-	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
+	xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
 	error = 0;
 out:
-	if (bp1)
-		xfs_da_buf_done(bp1);
-	if (bp2)
-		xfs_da_buf_done(bp2);
 	return(error);
 }
 
@@ -916,12 +914,15 @@ out:
  * or a leaf in a node attribute list.
  */
 STATIC int
-xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+xfs_attr_leaf_create(
+	xfs_da_args_t	*args,
+	xfs_dablk_t	blkno,
+	struct xfs_buf	**bpp)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_hdr_t *hdr;
 	xfs_inode_t *dp;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	trace_xfs_attr_leaf_create(args);
@@ -933,7 +934,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	if (error)
 		return(error);
 	ASSERT(bp != NULL);
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
@@ -947,7 +948,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
 					   sizeof(xfs_attr_leaf_hdr_t));
 
-	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+	xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
 	*bpp = bp;
 	return(0);
@@ -1014,7 +1015,9 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
  * Add a name to the leaf attribute list structure.
  */
 int
-xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_add(
+	struct xfs_buf		*bp,
+	struct xfs_da_args	*args)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_hdr_t *hdr;
@@ -1023,7 +1026,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	trace_xfs_attr_leaf_add(args);
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT((args->index >= 0)
 		&& (args->index <= be16_to_cpu(leaf->hdr.count)));
@@ -1085,7 +1088,10 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
  * Add a name to a leaf attribute list structure.
  */
 STATIC int
-xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
+xfs_attr_leaf_add_work(
+	struct xfs_buf	*bp,
+	xfs_da_args_t	*args,
+	int		mapindex)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_hdr_t *hdr;
@@ -1096,7 +1102,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	xfs_mount_t *mp;
 	int tmp, i;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
@@ -1110,7 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 		tmp  = be16_to_cpu(hdr->count) - args->index;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		memmove((char *)(entry+1), (char *)entry, tmp);
-		xfs_da_log_buf(args->trans, bp,
+		xfs_trans_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
 	be16_add_cpu(&hdr->count, 1);
@@ -1142,7 +1148,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 			args->index2++;
 		}
 	}
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 			  XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
 	ASSERT((args->index == 0) ||
 	       (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
@@ -1174,7 +1180,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 		args->rmtblkno = 1;
 		args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
 	}
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
 				   xfs_attr_leaf_entsize(leaf, args->index)));
 
@@ -1198,7 +1204,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 		}
 	}
 	be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
 	return(0);
 }
@@ -1207,7 +1213,9 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
  * Garbage collect a leaf attribute list block by copying it to a new buffer.
  */
 STATIC void
-xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
+xfs_attr_leaf_compact(
+	struct xfs_trans *trans,
+	struct xfs_buf	*bp)
 {
 	xfs_attr_leafblock_t *leaf_s, *leaf_d;
 	xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
@@ -1217,14 +1225,14 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	mp = trans->t_mountp;
 	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp));
-	memset(bp->data, 0, XFS_LBSIZE(mp));
+	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
+	memset(bp->b_addr, 0, XFS_LBSIZE(mp));
 
 	/*
 	 * Copy basic information
 	 */
 	leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
-	leaf_d = bp->data;
+	leaf_d = bp->b_addr;
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	hdr_d->info = hdr_s->info;	/* struct copy */
@@ -1247,7 +1255,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	 */
 	xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
 				be16_to_cpu(hdr_s->count), mp);
-	xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+	xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
 
 	kmem_free(tmpbuffer);
 }
@@ -1279,8 +1287,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 */
 	ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
-	leaf1 = blk1->bp->data;
-	leaf2 = blk2->bp->data;
+	leaf1 = blk1->bp->b_addr;
+	leaf2 = blk2->bp->b_addr;
 	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	args = state->args;
@@ -1298,8 +1306,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp_blk = blk1;
 		blk1 = blk2;
 		blk2 = tmp_blk;
-		leaf1 = blk1->bp->data;
-		leaf2 = blk2->bp->data;
+		leaf1 = blk1->bp->b_addr;
+		leaf2 = blk2->bp->b_addr;
 		swap = 1;
 	}
 	hdr1 = &leaf1->hdr;
@@ -1346,8 +1354,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
 				leaf2, 0, count, state->mp);
 
-		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
-		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+		xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+		xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
 	} else if (count > be16_to_cpu(hdr1->count)) {
 		/*
 		 * I assert that since all callers pass in an empty
@@ -1378,8 +1386,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		xfs_attr_leaf_moveents(leaf2, 0, leaf1,
 				be16_to_cpu(hdr1->count), count, state->mp);
 
-		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
-		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+		xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+		xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
 	}
 
 	/*
@@ -1448,8 +1456,8 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 	/*
 	 * Set up environment.
 	 */
-	leaf1 = blk1->bp->data;
-	leaf2 = blk2->bp->data;
+	leaf1 = blk1->bp->b_addr;
+	leaf2 = blk2->bp->b_addr;
 	hdr1 = &leaf1->hdr;
 	hdr2 = &leaf2->hdr;
 	foundit = 0;
@@ -1551,7 +1559,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	xfs_da_blkinfo_t *info;
 	int count, bytes, forward, error, retval, i;
 	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 
 	/*
 	 * Check for the degenerate case of the block being over 50% full.
@@ -1559,7 +1567,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * to coalesce with a sibling.
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
-	info = blk->bp->data;
+	info = blk->bp->b_addr;
 	ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	leaf = (xfs_attr_leafblock_t *)info;
 	count = be16_to_cpu(leaf->hdr.count);
@@ -1622,13 +1630,13 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		count  = be16_to_cpu(leaf->hdr.count);
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
-		leaf = bp->data;
+		leaf = bp->b_addr;
 		ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 		count += be16_to_cpu(leaf->hdr.count);
 		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
 		bytes -= sizeof(xfs_attr_leaf_hdr_t);
-		xfs_da_brelse(state->args->trans, bp);
+		xfs_trans_brelse(state->args->trans, bp);
 		if (bytes >= 0)
 			break;	/* fits with at least 25% to spare */
 	}
@@ -1666,7 +1674,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
  * If two leaves are 37% full, when combined they will leave 25% free.
  */
 int
-xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_remove(
+	struct xfs_buf	*bp,
+	xfs_da_args_t	*args)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_hdr_t *hdr;
@@ -1676,7 +1686,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	int tablesize, tmp, i;
 	xfs_mount_t *mp;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
@@ -1769,7 +1779,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 */
 	memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize);
 	be16_add_cpu(&hdr->usedbytes, -entsize);
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
 				   entsize));
 
@@ -1777,7 +1787,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 					* sizeof(xfs_attr_leaf_entry_t);
 	memmove((char *)entry, (char *)(entry+1), tmp);
 	be16_add_cpu(&hdr->count, -1);
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	entry = &leaf->entries[be16_to_cpu(hdr->count)];
 	memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
@@ -1807,7 +1817,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	} else {
 		hdr->holes = 1;		/* mark as needing compaction */
 	}
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 			  XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
 
 	/*
@@ -1840,8 +1850,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	mp = state->mp;
 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
-	drop_leaf = drop_blk->bp->data;
-	save_leaf = save_blk->bp->data;
+	drop_leaf = drop_blk->bp->b_addr;
+	save_leaf = save_blk->bp->b_addr;
 	ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	drop_hdr = &drop_leaf->hdr;
@@ -1906,7 +1916,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		kmem_free(tmpbuffer);
 	}
 
-	xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+	xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
 					   state->blocksize - 1);
 
 	/*
@@ -1934,7 +1944,9 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
  * Don't change the args->value unless we find the attribute.
  */
 int
-xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_lookup_int(
+	struct xfs_buf	*bp,
+	xfs_da_args_t	*args)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
@@ -1945,7 +1957,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	trace_xfs_attr_leaf_lookup(args);
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2041,7 +2053,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
  * list structure.
  */
 int
-xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_getvalue(
+	struct xfs_buf	*bp,
+	xfs_da_args_t	*args)
 {
 	int valuelen;
 	xfs_attr_leafblock_t *leaf;
@@ -2049,7 +2063,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_attr_leaf_name_local_t *name_loc;
 	xfs_attr_leaf_name_remote_t *name_rmt;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2247,12 +2261,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
  * Return 0 unless leaf2 should go before leaf1.
  */
 int
-xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+xfs_attr_leaf_order(
+	struct xfs_buf	*leaf1_bp,
+	struct xfs_buf	*leaf2_bp)
 {
 	xfs_attr_leafblock_t *leaf1, *leaf2;
 
-	leaf1 = leaf1_bp->data;
-	leaf2 = leaf2_bp->data;
+	leaf1 = leaf1_bp->b_addr;
+	leaf2 = leaf2_bp->b_addr;
 	ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
 	       (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
 	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
@@ -2272,11 +2288,13 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
  * Pick up the last hashvalue from a leaf block.
  */
 xfs_dahash_t
-xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_attr_leaf_lasthash(
+	struct xfs_buf	*bp,
+	int		*count)
 {
 	xfs_attr_leafblock_t *leaf;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
@@ -2337,7 +2355,9 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
  * Copy out attribute list entries for attr_list(), for leaf attribute lists.
  */
 int
-xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
+xfs_attr_leaf_list_int(
+	struct xfs_buf		*bp,
+	xfs_attr_list_context_t	*context)
 {
 	attrlist_cursor_kern_t *cursor;
 	xfs_attr_leafblock_t *leaf;
@@ -2345,7 +2365,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	int retval, i;
 
 	ASSERT(bp != NULL);
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	cursor = context->cursor;
 	cursor->initted = 1;
 
@@ -2463,7 +2483,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
 	xfs_attr_leaf_name_remote_t *name_rmt;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 #ifdef DEBUG
 	xfs_attr_leaf_name_local_t *name_loc;
@@ -2482,7 +2502,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	}
 	ASSERT(bp != NULL);
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
@@ -2505,7 +2525,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 #endif /* DEBUG */
 
 	entry->flags &= ~XFS_ATTR_INCOMPLETE;
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
 
 	if (args->rmtblkno) {
@@ -2513,10 +2533,9 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 		name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
 		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
 		name_rmt->valuelen = cpu_to_be32(args->valuelen);
-		xfs_da_log_buf(args->trans, bp,
+		xfs_trans_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
 	}
-	xfs_da_buf_done(bp);
 
 	/*
 	 * Commit the flag value change and start the next trans in series.
@@ -2533,7 +2552,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
 	xfs_attr_leaf_name_remote_t *name_rmt;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	trace_xfs_attr_leaf_setflag(args);
@@ -2548,7 +2567,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 	}
 	ASSERT(bp != NULL);
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
@@ -2556,16 +2575,15 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 
 	ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
 	entry->flags |= XFS_ATTR_INCOMPLETE;
-	xfs_da_log_buf(args->trans, bp,
+	xfs_trans_log_buf(args->trans, bp,
 			XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
 	if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
 		name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
 		name_rmt->valueblk = 0;
 		name_rmt->valuelen = 0;
-		xfs_da_log_buf(args->trans, bp,
+		xfs_trans_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
 	}
-	xfs_da_buf_done(bp);
 
 	/*
 	 * Commit the flag value change and start the next trans in series.
@@ -2586,7 +2604,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	xfs_attr_leafblock_t *leaf1, *leaf2;
 	xfs_attr_leaf_entry_t *entry1, *entry2;
 	xfs_attr_leaf_name_remote_t *name_rmt;
-	xfs_dabuf_t *bp1, *bp2;
+	struct xfs_buf *bp1, *bp2;
 	int error;
 #ifdef DEBUG
 	xfs_attr_leaf_name_local_t *name_loc;
@@ -2620,13 +2638,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 		bp2 = bp1;
 	}
 
-	leaf1 = bp1->data;
+	leaf1 = bp1->b_addr;
 	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
-	leaf2 = bp2->data;
+	leaf2 = bp2->b_addr;
 	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 	ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
 	ASSERT(args->index2 >= 0);
@@ -2660,30 +2678,27 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0);
 
 	entry1->flags &= ~XFS_ATTR_INCOMPLETE;
-	xfs_da_log_buf(args->trans, bp1,
+	xfs_trans_log_buf(args->trans, bp1,
 			  XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
 	if (args->rmtblkno) {
 		ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index);
 		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
 		name_rmt->valuelen = cpu_to_be32(args->valuelen);
-		xfs_da_log_buf(args->trans, bp1,
+		xfs_trans_log_buf(args->trans, bp1,
 			 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
 	}
 
 	entry2->flags |= XFS_ATTR_INCOMPLETE;
-	xfs_da_log_buf(args->trans, bp2,
+	xfs_trans_log_buf(args->trans, bp2,
 			  XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
 	if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
 		name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2);
 		name_rmt->valueblk = 0;
 		name_rmt->valuelen = 0;
-		xfs_da_log_buf(args->trans, bp2,
+		xfs_trans_log_buf(args->trans, bp2,
 			 XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt)));
 	}
-	xfs_da_buf_done(bp1);
-	if (bp1 != bp2)
-		xfs_da_buf_done(bp2);
 
 	/*
 	 * Commit the flag value change and start the next trans in series.
@@ -2706,7 +2721,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 {
 	xfs_da_blkinfo_t *info;
 	xfs_daddr_t blkno;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	/*
@@ -2718,20 +2733,20 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 	error = xfs_da_read_buf(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
 	if (error)
 		return(error);
-	blkno = xfs_da_blkno(bp);
+	blkno = XFS_BUF_ADDR(bp);
 
 	/*
 	 * Invalidate the tree, even if the "tree" is only a single leaf block.
 	 * This is a depth-first traversal!
 	 */
-	info = bp->data;
+	info = bp->b_addr;
 	if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
 		error = xfs_attr_node_inactive(trans, dp, bp, 1);
 	} else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
 		error = xfs_attr_leaf_inactive(trans, dp, bp);
 	} else {
 		error = XFS_ERROR(EIO);
-		xfs_da_brelse(*trans, bp);
+		xfs_trans_brelse(*trans, bp);
 	}
 	if (error)
 		return(error);
@@ -2742,7 +2757,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 	error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
 	if (error)
 		return(error);
-	xfs_da_binval(*trans, bp);	/* remove from cache */
+	xfs_trans_binval(*trans, bp);	/* remove from cache */
 	/*
 	 * Commit the invalidate and start the next transaction.
 	 */
@@ -2756,34 +2771,37 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
  * We're doing a depth-first traversal in order to invalidate everything.
  */
 STATIC int
-xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
-				   int level)
+xfs_attr_node_inactive(
+	struct xfs_trans **trans,
+	struct xfs_inode *dp,
+	struct xfs_buf	*bp,
+	int		level)
 {
 	xfs_da_blkinfo_t *info;
 	xfs_da_intnode_t *node;
 	xfs_dablk_t child_fsb;
 	xfs_daddr_t parent_blkno, child_blkno;
 	int error, count, i;
-	xfs_dabuf_t *child_bp;
+	struct xfs_buf *child_bp;
 
 	/*
 	 * Since this code is recursive (gasp!) we must protect ourselves.
 	 */
 	if (level > XFS_DA_NODE_MAXDEPTH) {
-		xfs_da_brelse(*trans, bp);	/* no locks for later trans */
+		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
 		return(XFS_ERROR(EIO));
 	}
 
-	node = bp->data;
+	node = bp->b_addr;
 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
+	parent_blkno = XFS_BUF_ADDR(bp);	/* save for re-read later */
 	count = be16_to_cpu(node->hdr.count);
 	if (!count) {
-		xfs_da_brelse(*trans, bp);
+		xfs_trans_brelse(*trans, bp);
 		return(0);
 	}
 	child_fsb = be32_to_cpu(node->btree[0].before);
-	xfs_da_brelse(*trans, bp);	/* no locks for later trans */
+	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
 
 	/*
 	 * If this is the node level just above the leaves, simply loop
@@ -2803,12 +2821,12 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 			return(error);
 		if (child_bp) {
 						/* save for re-read later */
-			child_blkno = xfs_da_blkno(child_bp);
+			child_blkno = XFS_BUF_ADDR(child_bp);
 
 			/*
 			 * Invalidate the subtree, however we have to.
 			 */
-			info = child_bp->data;
+			info = child_bp->b_addr;
 			if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
 				error = xfs_attr_node_inactive(trans, dp,
 						child_bp, level+1);
@@ -2817,7 +2835,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 						child_bp);
 			} else {
 				error = XFS_ERROR(EIO);
-				xfs_da_brelse(*trans, child_bp);
+				xfs_trans_brelse(*trans, child_bp);
 			}
 			if (error)
 				return(error);
@@ -2830,7 +2848,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 				&child_bp, XFS_ATTR_FORK);
 			if (error)
 				return(error);
-			xfs_da_binval(*trans, child_bp);
+			xfs_trans_binval(*trans, child_bp);
 		}
 
 		/*
@@ -2843,7 +2861,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 			if (error)
 				return(error);
 			child_fsb = be32_to_cpu(node->btree[i+1].before);
-			xfs_da_brelse(*trans, bp);
+			xfs_trans_brelse(*trans, bp);
 		}
 		/*
 		 * Atomically commit the whole invalidate stuff.
@@ -2863,7 +2881,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
  * caught holding something that the logging code wants to flush to disk.
  */
 STATIC int
-xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
+xfs_attr_leaf_inactive(
+	struct xfs_trans **trans,
+	struct xfs_inode *dp,
+	struct xfs_buf	*bp)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
@@ -2871,7 +2892,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	xfs_attr_inactive_list_t *list, *lp;
 	int error, count, size, tmp, i;
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
 
 	/*
@@ -2892,7 +2913,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	 * If there are no "remote" values, we're done.
 	 */
 	if (count == 0) {
-		xfs_da_brelse(*trans, bp);
+		xfs_trans_brelse(*trans, bp);
 		return(0);
 	}
 
@@ -2919,7 +2940,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 			}
 		}
 	}
-	xfs_da_brelse(*trans, bp);	/* unlock for trans. in freextent() */
+	xfs_trans_brelse(*trans, bp);	/* unlock for trans. in freextent() */
 
 	/*
 	 * Invalidate each of the "remote" value extents.
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 9c7d22f..dea1772 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -31,7 +31,6 @@
 struct attrlist;
 struct attrlist_cursor_kern;
 struct xfs_attr_list_context;
-struct xfs_dabuf;
 struct xfs_da_args;
 struct xfs_da_state;
 struct xfs_da_state_blk;
@@ -215,7 +214,7 @@ int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
 int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
 int	xfs_attr_shortform_remove(struct xfs_da_args *args);
 int	xfs_attr_shortform_list(struct xfs_attr_list_context *context);
-int	xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int	xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
 
 
@@ -223,7 +222,7 @@ int	xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
  * Internal routines when attribute fork size == XFS_LBSIZE(mp).
  */
 int	xfs_attr_leaf_to_node(struct xfs_da_args *args);
-int	xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
+int	xfs_attr_leaf_to_shortform(struct xfs_buf *bp,
 				   struct xfs_da_args *args, int forkoff);
 int	xfs_attr_leaf_clearflag(struct xfs_da_args *args);
 int	xfs_attr_leaf_setflag(struct xfs_da_args *args);
@@ -235,14 +234,14 @@ int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
 int	xfs_attr_leaf_split(struct xfs_da_state *state,
 				   struct xfs_da_state_blk *oldblk,
 				   struct xfs_da_state_blk *newblk);
-int	xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf,
+int	xfs_attr_leaf_lookup_int(struct xfs_buf *leaf,
 					struct xfs_da_args *args);
-int	xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args);
-int	xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer,
+int	xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
+int	xfs_attr_leaf_add(struct xfs_buf *leaf_buffer,
 				 struct xfs_da_args *args);
-int	xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer,
+int	xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer,
 				    struct xfs_da_args *args);
-int	xfs_attr_leaf_list_int(struct xfs_dabuf *bp,
+int	xfs_attr_leaf_list_int(struct xfs_buf *bp,
 				      struct xfs_attr_list_context *context);
 
 /*
@@ -257,9 +256,9 @@ int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
 /*
  * Utility routines.
  */
-xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
-int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
-				   struct xfs_dabuf *leaf2_bp);
+xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
+int	xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
+				   struct xfs_buf *leaf2_bp);
 int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
 					int *local);
 #endif	/* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 76e5dba..7bfb7dd 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -83,9 +83,9 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 /*
  * Utility routines.
  */
-STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
-STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(xfs_buf_t *bp);
+STATIC uint	xfs_da_node_lasthash(struct xfs_buf *bp, int *count);
+STATIC int	xfs_da_node_order(struct xfs_buf *node1_bp,
+				  struct xfs_buf *node2_bp);
 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
 				  xfs_da_state_blk_t *drop_blk,
 				  xfs_da_state_blk_t *save_blk);
@@ -100,10 +100,10 @@ STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
  */
 int
 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
-				 xfs_dabuf_t **bpp, int whichfork)
+				 struct xfs_buf **bpp, int whichfork)
 {
 	xfs_da_intnode_t *node;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 	xfs_trans_t *tp;
 
@@ -114,7 +114,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	if (error)
 		return(error);
 	ASSERT(bp != NULL);
-	node = bp->data;
+	node = bp->b_addr;
 	node->hdr.info.forw = 0;
 	node->hdr.info.back = 0;
 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
@@ -122,7 +122,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	node->hdr.count = 0;
 	node->hdr.level = cpu_to_be16(level);
 
-	xfs_da_log_buf(tp, bp,
+	xfs_trans_log_buf(tp, bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 	*bpp = bp;
@@ -138,7 +138,7 @@ xfs_da_split(xfs_da_state_t *state)
 {
 	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
 	xfs_da_intnode_t *node;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int max, action, error, i;
 
 	trace_xfs_da_split(state->args);
@@ -203,7 +203,6 @@ xfs_da_split(xfs_da_state_t *state)
 		case XFS_DA_NODE_MAGIC:
 			error = xfs_da_node_split(state, oldblk, newblk, addblk,
 							 max - i, &action);
-			xfs_da_buf_done(addblk->bp);
 			addblk->bp = NULL;
 			if (error)
 				return(error);	/* GROT: dir is inconsistent */
@@ -221,13 +220,6 @@ xfs_da_split(xfs_da_state_t *state)
 		 * Update the btree to show the new hashval for this child.
 		 */
 		xfs_da_fixhashpath(state, &state->path);
-		/*
-		 * If we won't need this block again, it's getting dropped
-		 * from the active path by the loop control, so we need
-		 * to mark it done now.
-		 */
-		if (i > 0 || !addblk)
-			xfs_da_buf_done(oldblk->bp);
 	}
 	if (!addblk)
 		return(0);
@@ -239,8 +231,6 @@ xfs_da_split(xfs_da_state_t *state)
 	oldblk = &state->path.blk[0];
 	error = xfs_da_root_split(state, oldblk, addblk);
 	if (error) {
-		xfs_da_buf_done(oldblk->bp);
-		xfs_da_buf_done(addblk->bp);
 		addblk->bp = NULL;
 		return(error);	/* GROT: dir is inconsistent */
 	}
@@ -252,7 +242,7 @@ xfs_da_split(xfs_da_state_t *state)
 	 * and the original block 0 could be at any position in the list.
 	 */
 
-	node = oldblk->bp->data;
+	node = oldblk->bp->b_addr;
 	if (node->hdr.info.forw) {
 		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
 			bp = addblk->bp;
@@ -260,13 +250,13 @@ xfs_da_split(xfs_da_state_t *state)
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
-		node = bp->data;
+		node = bp->b_addr;
 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
-		xfs_da_log_buf(state->args->trans, bp,
+		xfs_trans_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
 	}
-	node = oldblk->bp->data;
+	node = oldblk->bp->b_addr;
 	if (node->hdr.info.back) {
 		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
 			bp = addblk->bp;
@@ -274,14 +264,12 @@ xfs_da_split(xfs_da_state_t *state)
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
-		node = bp->data;
+		node = bp->b_addr;
 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
-		xfs_da_log_buf(state->args->trans, bp,
+		xfs_trans_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
 	}
-	xfs_da_buf_done(oldblk->bp);
-	xfs_da_buf_done(addblk->bp);
 	addblk->bp = NULL;
 	return(0);
 }
@@ -298,7 +286,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	xfs_da_intnode_t *node, *oldroot;
 	xfs_da_args_t *args;
 	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error, size;
 	xfs_inode_t *dp;
 	xfs_trans_t *tp;
@@ -323,8 +311,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	if (error)
 		return(error);
 	ASSERT(bp != NULL);
-	node = bp->data;
-	oldroot = blk1->bp->data;
+	node = bp->b_addr;
+	oldroot = blk1->bp->b_addr;
 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
 			     (char *)oldroot);
@@ -335,8 +323,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 			     (char *)leaf);
 	}
 	memcpy(node, oldroot, size);
-	xfs_da_log_buf(tp, bp, 0, size - 1);
-	xfs_da_buf_done(blk1->bp);
+	xfs_trans_log_buf(tp, bp, 0, size - 1);
 	blk1->bp = bp;
 	blk1->blkno = blkno;
 
@@ -348,7 +335,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
 	if (error)
 		return(error);
-	node = bp->data;
+	node = bp->b_addr;
 	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
 	node->btree[0].before = cpu_to_be32(blk1->blkno);
 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
@@ -365,10 +352,9 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 #endif
 
 	/* Header is already logged by xfs_da_node_create */
-	xfs_da_log_buf(tp, bp,
+	xfs_trans_log_buf(tp, bp,
 		XFS_DA_LOGRANGE(node, node->btree,
 			sizeof(xfs_da_node_entry_t) * 2));
-	xfs_da_buf_done(bp);
 
 	return(0);
 }
@@ -389,7 +375,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 	trace_xfs_da_node_split(state->args);
 
-	node = oldblk->bp->data;
+	node = oldblk->bp->b_addr;
 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 
 	/*
@@ -436,7 +422,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	 *
 	 * If we had double-split op below us, then add the extra block too.
 	 */
-	node = oldblk->bp->data;
+	node = oldblk->bp->b_addr;
 	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
 		oldblk->index++;
 		xfs_da_node_add(state, oldblk, addblk);
@@ -477,8 +463,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 
 	trace_xfs_da_node_rebalance(state->args);
 
-	node1 = blk1->bp->data;
-	node2 = blk2->bp->data;
+	node1 = blk1->bp->b_addr;
+	node2 = blk2->bp->b_addr;
 	/*
 	 * Figure out how many entries need to move, and in which direction.
 	 * Swap the nodes around if that makes it simpler.
@@ -532,7 +518,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
 		memcpy(btree_d, btree_s, tmp);
 		be16_add_cpu(&node1->hdr.count, count);
-		xfs_da_log_buf(tp, blk1->bp,
+		xfs_trans_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
 
 		/*
@@ -549,9 +535,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	/*
 	 * Log header of node 1 and all current bits of node 2.
 	 */
-	xfs_da_log_buf(tp, blk1->bp,
+	xfs_trans_log_buf(tp, blk1->bp,
 		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
-	xfs_da_log_buf(tp, blk2->bp,
+	xfs_trans_log_buf(tp, blk2->bp,
 		XFS_DA_LOGRANGE(node2, &node2->hdr,
 			sizeof(node2->hdr) +
 			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
@@ -560,8 +546,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Record the last hashval from each block for upward propagation.
 	 * (note: don't use the swapped node pointers)
 	 */
-	node1 = blk1->bp->data;
-	node2 = blk2->bp->data;
+	node1 = blk1->bp->b_addr;
+	node2 = blk2->bp->b_addr;
 	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
 	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
 
@@ -587,7 +573,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 	trace_xfs_da_node_add(state->args);
 
-	node = oldblk->bp->data;
+	node = oldblk->bp->b_addr;
 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
 	ASSERT(newblk->blkno != 0);
@@ -606,10 +592,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	}
 	btree->hashval = cpu_to_be32(newblk->hashval);
 	btree->before = cpu_to_be32(newblk->blkno);
-	xfs_da_log_buf(state->args->trans, oldblk->bp,
+	xfs_trans_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
 	be16_add_cpu(&node->hdr.count, 1);
-	xfs_da_log_buf(state->args->trans, oldblk->bp,
+	xfs_trans_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 	/*
@@ -735,7 +721,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	xfs_da_intnode_t *oldroot;
 	xfs_da_args_t *args;
 	xfs_dablk_t child;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	trace_xfs_da_root_join(state->args);
@@ -743,7 +729,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	args = state->args;
 	ASSERT(args != NULL);
 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
-	oldroot = root_blk->bp->data;
+	oldroot = root_blk->bp->b_addr;
 	ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	ASSERT(!oldroot->hdr.info.forw);
 	ASSERT(!oldroot->hdr.info.back);
@@ -765,11 +751,11 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	if (error)
 		return(error);
 	ASSERT(bp != NULL);
-	xfs_da_blkinfo_onlychild_validate(bp->data,
+	xfs_da_blkinfo_onlychild_validate(bp->b_addr,
 					be16_to_cpu(oldroot->hdr.level));
 
-	memcpy(root_blk->bp->data, bp->data, state->blocksize);
-	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+	memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
+	xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 	error = xfs_da_shrink_inode(args, child, bp);
 	return(error);
 }
@@ -791,7 +777,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	xfs_da_blkinfo_t *info;
 	int count, forward, error, retval, i;
 	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 
 	/*
 	 * Check for the degenerate case of the block being over 50% full.
@@ -799,7 +785,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 * to coalesce with a sibling.
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
-	info = blk->bp->data;
+	info = blk->bp->b_addr;
 	ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	node = (xfs_da_intnode_t *)info;
 	count = be16_to_cpu(node->hdr.count);
@@ -859,10 +845,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		count  = state->node_ents;
 		count -= state->node_ents >> 2;
 		count -= be16_to_cpu(node->hdr.count);
-		node = bp->data;
+		node = bp->b_addr;
 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 		count -= be16_to_cpu(node->hdr.count);
-		xfs_da_brelse(state->args->trans, bp);
+		xfs_trans_brelse(state->args->trans, bp);
 		if (count >= 0)
 			break;	/* fits with at least 25% to spare */
 	}
@@ -934,14 +920,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		break;
 	}
 	for (blk--, level--; level >= 0; blk--, level--) {
-		node = blk->bp->data;
+		node = blk->bp->b_addr;
 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 		btree = &node->btree[ blk->index ];
 		if (be32_to_cpu(btree->hashval) == lasthash)
 			break;
 		blk->hashval = lasthash;
 		btree->hashval = cpu_to_be32(lasthash);
-		xfs_da_log_buf(state->args->trans, blk->bp,
+		xfs_trans_log_buf(state->args->trans, blk->bp,
 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
 		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
@@ -960,7 +946,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 
 	trace_xfs_da_node_remove(state->args);
 
-	node = drop_blk->bp->data;
+	node = drop_blk->bp->b_addr;
 	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
 	ASSERT(drop_blk->index >= 0);
 
@@ -972,15 +958,15 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, btree + 1, tmp);
-		xfs_da_log_buf(state->args->trans, drop_blk->bp,
+		xfs_trans_log_buf(state->args->trans, drop_blk->bp,
 		    XFS_DA_LOGRANGE(node, btree, tmp));
 		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
 	}
 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
-	xfs_da_log_buf(state->args->trans, drop_blk->bp,
+	xfs_trans_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 	be16_add_cpu(&node->hdr.count, -1);
-	xfs_da_log_buf(state->args->trans, drop_blk->bp,
+	xfs_trans_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 	/*
@@ -1005,8 +991,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
 	trace_xfs_da_node_unbalance(state->args);
 
-	drop_node = drop_blk->bp->data;
-	save_node = save_blk->bp->data;
+	drop_node = drop_blk->bp->b_addr;
+	save_node = save_blk->bp->b_addr;
 	ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	tp = state->args->trans;
@@ -1023,13 +1009,13 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, &save_node->btree[0], tmp);
 		btree = &save_node->btree[0];
-		xfs_da_log_buf(tp, save_blk->bp,
+		xfs_trans_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
 				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
 				sizeof(xfs_da_node_entry_t)));
 	} else {
 		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
-		xfs_da_log_buf(tp, save_blk->bp,
+		xfs_trans_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
 				be16_to_cpu(drop_node->hdr.count) *
 				sizeof(xfs_da_node_entry_t)));
@@ -1042,7 +1028,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	memcpy(btree, &drop_node->btree[0], tmp);
 	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
 
-	xfs_da_log_buf(tp, save_blk->bp,
+	xfs_trans_log_buf(tp, save_blk->bp,
 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
 			sizeof(save_node->hdr)));
 
@@ -1100,7 +1086,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			state->path.active--;
 			return(error);
 		}
-		curr = blk->bp->data;
+		curr = blk->bp->b_addr;
 		blk->magic = be16_to_cpu(curr->magic);
 		ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
 		       blk->magic == XFS_DIR2_LEAFN_MAGIC ||
@@ -1110,7 +1096,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		 * Search an intermediate node for a match.
 		 */
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
-			node = blk->bp->data;
+			node = blk->bp->b_addr;
 			max = be16_to_cpu(node->hdr.count);
 			blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
 
@@ -1216,15 +1202,15 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
 	xfs_da_args_t *args;
 	int before=0, error;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 
 	/*
 	 * Set up environment.
 	 */
 	args = state->args;
 	ASSERT(args != NULL);
-	old_info = old_blk->bp->data;
-	new_info = new_blk->bp->data;
+	old_info = old_blk->bp->b_addr;
+	new_info = new_blk->bp->b_addr;
 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
 	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1261,12 +1247,11 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
-			tmp_info = bp->data;
+			tmp_info = bp->b_addr;
 			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
 			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
 			tmp_info->forw = cpu_to_be32(new_blk->blkno);
-			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-			xfs_da_buf_done(bp);
+			xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 		}
 		old_info->back = cpu_to_be32(new_blk->blkno);
 	} else {
@@ -1283,18 +1268,17 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
-			tmp_info = bp->data;
+			tmp_info = bp->b_addr;
 			ASSERT(tmp_info->magic == old_info->magic);
 			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
 			tmp_info->back = cpu_to_be32(new_blk->blkno);
-			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-			xfs_da_buf_done(bp);
+			xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 		}
 		old_info->forw = cpu_to_be32(new_blk->blkno);
 	}
 
-	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
-	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+	xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+	xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
 	return(0);
 }
 
@@ -1302,12 +1286,14 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
  * Compare two intermediate nodes for "order".
  */
 STATIC int
-xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
+xfs_da_node_order(
+	struct xfs_buf	*node1_bp,
+	struct xfs_buf	*node2_bp)
 {
 	xfs_da_intnode_t *node1, *node2;
 
-	node1 = node1_bp->data;
-	node2 = node2_bp->data;
+	node1 = node1_bp->b_addr;
+	node2 = node2_bp->b_addr;
 	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
 	       node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
@@ -1324,11 +1310,13 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
  * Pick up the last hashvalue from an intermediate node.
  */
 STATIC uint
-xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_da_node_lasthash(
+	struct xfs_buf	*bp,
+	int		*count)
 {
 	xfs_da_intnode_t *node;
 
-	node = bp->data;
+	node = bp->b_addr;
 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 	if (count)
 		*count = be16_to_cpu(node->hdr.count);
@@ -1346,7 +1334,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 {
 	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
 	xfs_da_args_t *args;
-	xfs_dabuf_t *bp;
+	struct xfs_buf *bp;
 	int error;
 
 	/*
@@ -1354,8 +1342,8 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 */
 	args = state->args;
 	ASSERT(args != NULL);
-	save_info = save_blk->bp->data;
-	drop_info = drop_blk->bp->data;
+	save_info = save_blk->bp->b_addr;
+	drop_info = drop_blk->bp->b_addr;
 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
 	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1380,13 +1368,12 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
-			tmp_info = bp->data;
+			tmp_info = bp->b_addr;
 			ASSERT(tmp_info->magic == save_info->magic);
 			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
 			tmp_info->forw = cpu_to_be32(save_blk->blkno);
-			xfs_da_log_buf(args->trans, bp, 0,
+			xfs_trans_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
-			xfs_da_buf_done(bp);
 		}
 	} else {
 		trace_xfs_da_unlink_forward(args);
@@ -1398,17 +1385,16 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
-			tmp_info = bp->data;
+			tmp_info = bp->b_addr;
 			ASSERT(tmp_info->magic == save_info->magic);
 			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
 			tmp_info->back = cpu_to_be32(save_blk->blkno);
-			xfs_da_log_buf(args->trans, bp, 0,
+			xfs_trans_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
-			xfs_da_buf_done(bp);
 		}
 	}
 
-	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+	xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
 	return(0);
 }
 
@@ -1443,7 +1429,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 	level = (path->active-1) - 1;	/* skip bottom layer in path */
 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
 		ASSERT(blk->bp != NULL);
-		node = blk->bp->data;
+		node = blk->bp->b_addr;
 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
 			blk->index++;
@@ -1471,7 +1457,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		 * (if it's dirty, trans won't actually let go)
 		 */
 		if (release)
-			xfs_da_brelse(args->trans, blk->bp);
+			xfs_trans_brelse(args->trans, blk->bp);
 
 		/*
 		 * Read the next child block.
@@ -1482,7 +1468,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		if (error)
 			return(error);
 		ASSERT(blk->bp != NULL);
-		info = blk->bp->data;
+		info = blk->bp->b_addr;
 		ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
 		       info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
 		       info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
@@ -1702,11 +1688,13 @@ xfs_da_grow_inode(
  * a bmap btree split to do that.
  */
 STATIC int
-xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
-		      xfs_dabuf_t **dead_bufp)
+xfs_da_swap_lastblock(
+	xfs_da_args_t	*args,
+	xfs_dablk_t	*dead_blknop,
+	struct xfs_buf	**dead_bufp)
 {
 	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
-	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
+	struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf;
 	xfs_fileoff_t lastoff;
 	xfs_inode_t *ip;
 	xfs_trans_t *tp;
@@ -1744,9 +1732,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Copy the last block into the dead buffer and log it.
 	 */
-	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
-	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
-	dead_info = dead_buf->data;
+	memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
+	xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+	dead_info = dead_buf->b_addr;
 	/*
 	 * Get values from the moved block.
 	 */
@@ -1767,7 +1755,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
-		sib_info = sib_buf->data;
+		sib_info = sib_buf->b_addr;
 		if (unlikely(
 		    be32_to_cpu(sib_info->forw) != last_blkno ||
 		    sib_info->magic != dead_info->magic)) {
@@ -1777,10 +1765,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		}
 		sib_info->forw = cpu_to_be32(dead_blkno);
-		xfs_da_log_buf(tp, sib_buf,
+		xfs_trans_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
 					sizeof(sib_info->forw)));
-		xfs_da_buf_done(sib_buf);
 		sib_buf = NULL;
 	}
 	/*
@@ -1789,7 +1776,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
-		sib_info = sib_buf->data;
+		sib_info = sib_buf->b_addr;
 		if (unlikely(
 		       be32_to_cpu(sib_info->back) != last_blkno ||
 		       sib_info->magic != dead_info->magic)) {
@@ -1799,10 +1786,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		}
 		sib_info->back = cpu_to_be32(dead_blkno);
-		xfs_da_log_buf(tp, sib_buf,
+		xfs_trans_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
 					sizeof(sib_info->back)));
-		xfs_da_buf_done(sib_buf);
 		sib_buf = NULL;
 	}
 	par_blkno = mp->m_dirleafblk;
@@ -1813,7 +1799,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	for (;;) {
 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
 			goto done;
-		par_node = par_buf->data;
+		par_node = par_buf->b_addr;
 		if (unlikely(par_node->hdr.info.magic !=
 		    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
@@ -1837,7 +1823,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_blkno = be32_to_cpu(par_node->btree[entno].before);
 		if (level == dead_level + 1)
 			break;
-		xfs_da_brelse(tp, par_buf);
+		xfs_trans_brelse(tp, par_buf);
 		par_buf = NULL;
 	}
 	/*
@@ -1853,7 +1839,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		if (entno < be16_to_cpu(par_node->hdr.count))
 			break;
 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
-		xfs_da_brelse(tp, par_buf);
+		xfs_trans_brelse(tp, par_buf);
 		par_buf = NULL;
 		if (unlikely(par_blkno == 0)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
@@ -1863,7 +1849,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		}
 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
 			goto done;
-		par_node = par_buf->data;
+		par_node = par_buf->b_addr;
 		if (unlikely(
 		    be16_to_cpu(par_node->hdr.level) != level ||
 		    par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
@@ -1878,20 +1864,18 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	 * Update the parent entry pointing to the moved block.
 	 */
 	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
-	xfs_da_log_buf(tp, par_buf,
+	xfs_trans_log_buf(tp, par_buf,
 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
 				sizeof(par_node->btree[entno].before)));
-	xfs_da_buf_done(par_buf);
-	xfs_da_buf_done(dead_buf);
 	*dead_blknop = last_blkno;
 	*dead_bufp = last_buf;
 	return 0;
 done:
 	if (par_buf)
-		xfs_da_brelse(tp, par_buf);
+		xfs_trans_brelse(tp, par_buf);
 	if (sib_buf)
-		xfs_da_brelse(tp, sib_buf);
-	xfs_da_brelse(tp, last_buf);
+		xfs_trans_brelse(tp, sib_buf);
+	xfs_trans_brelse(tp, last_buf);
 	return error;
 }
 
@@ -1899,8 +1883,10 @@ done:
  * Remove a btree block from a directory or attribute.
  */
 int
-xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
-		    xfs_dabuf_t *dead_buf)
+xfs_da_shrink_inode(
+	xfs_da_args_t	*args,
+	xfs_dablk_t	dead_blkno,
+	struct xfs_buf	*dead_buf)
 {
 	xfs_inode_t *dp;
 	int done, error, w, count;
@@ -1935,7 +1921,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 			break;
 		}
 	}
-	xfs_da_binval(tp, dead_buf);
+	xfs_trans_binval(tp, dead_buf);
 	return error;
 }
 
@@ -2099,7 +2085,7 @@ xfs_da_get_buf(
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
-	xfs_dabuf_t		**bpp,
+	struct xfs_buf		**bpp,
 	int			whichfork)
 {
 	struct xfs_buf		*bp;
@@ -2128,7 +2114,7 @@ xfs_da_get_buf(
 		goto out_free;
 	}
 
-	*bpp = xfs_da_buf_make(bp);
+	*bpp = bp;
 
 out_free:
 	if (mapp != &map)
@@ -2146,7 +2132,7 @@ xfs_da_read_buf(
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
-	xfs_dabuf_t		**bpp,
+	struct xfs_buf		**bpp,
 	int			whichfork)
 {
 	struct xfs_buf		*bp;
@@ -2178,16 +2164,14 @@ xfs_da_read_buf(
 	else
 		xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
 
-	*bpp = xfs_da_buf_make(bp);
-
 	/*
 	 * This verification code will be moved to a CRC verification callback
 	 * function so just leave it here unchanged until then.
 	 */
 	{
-		xfs_dir2_data_hdr_t	*hdr = (*bpp)->data;
-		xfs_dir2_free_t		*free = (*bpp)->data;
-		xfs_da_blkinfo_t	*info = (*bpp)->data;
+		xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
+		xfs_dir2_free_t		*free = bp->b_addr;
+		xfs_da_blkinfo_t	*info = bp->b_addr;
 		uint			magic, magic1;
 		struct xfs_mount	*mp = dp->i_mount;
 
@@ -2207,11 +2191,11 @@ xfs_da_read_buf(
 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
 					     XFS_ERRLEVEL_LOW, mp, info);
 			error = XFS_ERROR(EFSCORRUPTED);
-			xfs_da_brelse(trans, *bpp);
+			xfs_trans_brelse(trans, bp);
 			goto out_free;
 		}
 	}
-
+	*bpp = bp;
 out_free:
 	if (mapp != &map)
 		kmem_free(mapp);
@@ -2259,7 +2243,6 @@ out_free:
 }
 
 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
-kmem_zone_t *xfs_dabuf_zone;		/* dabuf zone */
 
 /*
  * Allocate a dir-state structure.
@@ -2279,13 +2262,8 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
 {
 	int	i;
 
-	for (i = 0; i < state->altpath.active; i++) {
-		if (state->altpath.blk[i].bp) {
-			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
-				xfs_da_buf_done(state->altpath.blk[i].bp);
-			state->altpath.blk[i].bp = NULL;
-		}
-	}
+	for (i = 0; i < state->altpath.active; i++)
+		state->altpath.blk[i].bp = NULL;
 	state->altpath.active = 0;
 }
 
@@ -2295,88 +2273,9 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
 void
 xfs_da_state_free(xfs_da_state_t *state)
 {
-	int	i;
-
 	xfs_da_state_kill_altpath(state);
-	for (i = 0; i < state->path.active; i++) {
-		if (state->path.blk[i].bp)
-			xfs_da_buf_done(state->path.blk[i].bp);
-	}
-	if (state->extravalid && state->extrablk.bp)
-		xfs_da_buf_done(state->extrablk.bp);
 #ifdef DEBUG
 	memset((char *)state, 0, sizeof(*state));
 #endif /* DEBUG */
 	kmem_zone_free(xfs_da_state_zone, state);
 }
-
-/*
- * Create a dabuf.
- */
-/* ARGSUSED */
-STATIC xfs_dabuf_t *
-xfs_da_buf_make(xfs_buf_t *bp)
-{
-	xfs_dabuf_t	*dabuf;
-
-	dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
-	dabuf->bbcount = bp->b_length;
-	dabuf->data = bp->b_addr;
-	dabuf->bp = bp;
-	return dabuf;
-}
-
-/*
- * Release a dabuf.
- */
-void
-xfs_da_buf_done(xfs_dabuf_t *dabuf)
-{
-	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
-	kmem_zone_free(xfs_dabuf_zone, dabuf);
-}
-
-/*
- * Log transaction from a dabuf.
- */
-void
-xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
-{
-	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
-	ASSERT(dabuf->data == dabuf->bp->b_addr);
-	xfs_trans_log_buf(tp, dabuf->bp, first, last);
-}
-
-/*
- * Release dabuf from a transaction.
- * Have to free up the dabuf before the buffers are released,
- * since the synchronization on the dabuf is really the lock on the buffer.
- */
-void
-xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
-	xfs_trans_brelse(tp, dabuf->bp);
-	xfs_da_buf_done(dabuf);
-}
-
-/*
- * Invalidate dabuf from a transaction.
- */
-void
-xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-	ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp);
-	xfs_da_buf_done(dabuf);
-	xfs_trans_binval(tp, dabuf->bp);
-}
-
-/*
- * Get the first daddr from a dabuf.
- */
-xfs_daddr_t
-xfs_da_blkno(xfs_dabuf_t *dabuf)
-{
-	ASSERT(dabuf->data);
-	return XFS_BUF_ADDR(dabuf->bp);
-}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 0b64c4a..9f37aa0 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -133,20 +133,6 @@ typedef struct xfs_da_args {
 	{ XFS_DA_OP_CILOOKUP,	"CILOOKUP" }
 
 /*
- * Structure to describe buffer(s) for a block.
- * This is needed in the directory version 2 format case, when
- * multiple non-contiguous fsblocks might be needed to cover one
- * logical directory block.
- * If the buffer count is 1 then the data pointer points to the
- * same place as the b_addr field for the buffer, else to kmem_alloced memory.
- */
-typedef struct xfs_dabuf {
-	short		bbcount;	/* how large is data in bbs */
-	void		*data;		/* pointer for buffers' data */
-	struct xfs_buf	*bp;		/* actually nbuf of these */
-} xfs_dabuf_t;
-
-/*
  * Storage for holding state during Btree searches and split/join ops.
  *
  * Only need space for 5 intermediate nodes.  With a minimum of 62-way
@@ -154,7 +140,7 @@ typedef struct xfs_dabuf {
  * which is slightly more than enough.
  */
 typedef struct xfs_da_state_blk {
-	xfs_dabuf_t	*bp;		/* buffer containing block */
+	struct xfs_buf	*bp;		/* buffer containing block */
 	xfs_dablk_t	blkno;		/* filesystem blkno of buffer */
 	xfs_daddr_t	disk_blkno;	/* on-disk blkno (in BBs) of buffer */
 	int		index;		/* relevant index into block */
@@ -207,7 +193,7 @@ struct xfs_nameops {
  * Routines used for growing the Btree.
  */
 int	xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
-					 xfs_dabuf_t **bpp, int whichfork);
+					 struct xfs_buf **bpp, int whichfork);
 int	xfs_da_split(xfs_da_state_t *state);
 
 /*
@@ -237,14 +223,14 @@ int	xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
 			      int count);
 int	xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 			      xfs_dablk_t bno, xfs_daddr_t mappedbno,
-			      xfs_dabuf_t **bp, int whichfork);
+			      struct xfs_buf **bp, int whichfork);
 int	xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 			       xfs_dablk_t bno, xfs_daddr_t mappedbno,
-			       xfs_dabuf_t **bpp, int whichfork);
+			       struct xfs_buf **bpp, int whichfork);
 xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 			xfs_dablk_t bno, int whichfork);
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
-					  xfs_dabuf_t *dead_buf);
+					  struct xfs_buf *dead_buf);
 
 uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
 enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
@@ -254,15 +240,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
 
-void xfs_da_buf_done(xfs_dabuf_t *dabuf);
-void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
-			   uint last);
-void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
-
 extern struct kmem_zone *xfs_da_state_zone;
-extern struct kmem_zone *xfs_dabuf_zone;
 extern const struct xfs_nameops xfs_default_nameops;
 
 #endif	/* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 67a250c36..b26a50f 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -592,7 +592,7 @@ int
 xfs_dir2_shrink_inode(
 	xfs_da_args_t	*args,
 	xfs_dir2_db_t	db,
-	xfs_dabuf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	xfs_fileoff_t	bno;		/* directory file offset */
 	xfs_dablk_t	da;		/* directory file offset */
@@ -634,7 +634,7 @@ xfs_dir2_shrink_inode(
 	/*
 	 * Invalidate the buffer from the transaction.
 	 */
-	xfs_da_binval(tp, bp);
+	xfs_trans_binval(tp, bp);
 	/*
 	 * If it's not a data block, we're done.
 	 */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 586732f..e93ca8f 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -37,10 +37,10 @@
 /*
  * Local function prototypes.
  */
-static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, xfs_dabuf_t *bp, int first,
-				    int last);
-static void xfs_dir2_block_log_tail(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp,
+static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp,
+				    int first, int last);
+static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp);
+static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp,
 				     int *entno);
 static int xfs_dir2_block_sort(const void *a, const void *b);
 
@@ -66,7 +66,7 @@ xfs_dir2_block_addname(
 	xfs_dir2_data_free_t	*bf;		/* bestfree table in block */
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
-	xfs_dabuf_t		*bp;		/* buffer for block */
+	struct xfs_buf		*bp;		/* buffer for block */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	int			compact;	/* need to compact leaf ents */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
@@ -102,14 +102,14 @@ xfs_dir2_block_addname(
 		return error;
 	}
 	ASSERT(bp != NULL);
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	/*
 	 * Check the magic number, corrupted if wrong.
 	 */
 	if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
 		XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
 				     XFS_ERRLEVEL_LOW, mp, hdr);
-		xfs_da_brelse(tp, bp);
+		xfs_trans_brelse(tp, bp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	len = xfs_dir2_data_entsize(args->namelen);
@@ -212,7 +212,7 @@ xfs_dir2_block_addname(
 	 * If this isn't a real add, we're done with the buffer.
 	 */
 	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-		xfs_da_brelse(tp, bp);
+		xfs_trans_brelse(tp, bp);
 	/*
 	 * If we don't have space for the new entry & leaf ...
 	 */
@@ -228,7 +228,6 @@ xfs_dir2_block_addname(
 		 * Then add the new entry in that format.
 		 */
 		error = xfs_dir2_block_to_leaf(args, bp);
-		xfs_da_buf_done(bp);
 		if (error)
 			return error;
 		return xfs_dir2_leaf_addname(args);
@@ -422,7 +421,6 @@ xfs_dir2_block_addname(
 	xfs_dir2_block_log_tail(tp, bp);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	xfs_dir2_data_check(dp, bp);
-	xfs_da_buf_done(bp);
 	return 0;
 }
 
@@ -437,7 +435,7 @@ xfs_dir2_block_getdents(
 	filldir_t		filldir)
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
-	xfs_dabuf_t		*bp;		/* buffer for block */
+	struct xfs_buf		*bp;		/* buffer for block */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
 	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
@@ -469,7 +467,7 @@ xfs_dir2_block_getdents(
 	 * We'll skip entries before this.
 	 */
 	wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	xfs_dir2_data_check(dp, bp);
 	/*
 	 * Set up values for the loop.
@@ -514,7 +512,7 @@ xfs_dir2_block_getdents(
 			    cook & 0x7fffffff, be64_to_cpu(dep->inumber),
 			    DT_UNKNOWN)) {
 			*offset = cook & 0x7fffffff;
-			xfs_da_brelse(NULL, bp);
+			xfs_trans_brelse(NULL, bp);
 			return 0;
 		}
 	}
@@ -525,7 +523,7 @@ xfs_dir2_block_getdents(
 	 */
 	*offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
 			0x7fffffff;
-	xfs_da_brelse(NULL, bp);
+	xfs_trans_brelse(NULL, bp);
 	return 0;
 }
 
@@ -535,17 +533,17 @@ xfs_dir2_block_getdents(
 static void
 xfs_dir2_block_log_leaf(
 	xfs_trans_t		*tp,		/* transaction structure */
-	xfs_dabuf_t		*bp,		/* block buffer */
+	struct xfs_buf		*bp,		/* block buffer */
 	int			first,		/* index of first logged leaf */
 	int			last)		/* index of last logged leaf */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 	xfs_dir2_leaf_entry_t	*blp;
 	xfs_dir2_block_tail_t	*btp;
 
 	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
-	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
 		(uint)((char *)&blp[last + 1] - (char *)hdr - 1));
 }
 
@@ -555,13 +553,13 @@ xfs_dir2_block_log_leaf(
 static void
 xfs_dir2_block_log_tail(
 	xfs_trans_t		*tp,		/* transaction structure */
-	xfs_dabuf_t		*bp)		/* block buffer */
+	struct xfs_buf		*bp)		/* block buffer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 	xfs_dir2_block_tail_t	*btp;
 
 	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
-	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
 		(uint)((char *)(btp + 1) - (char *)hdr - 1));
 }
 
@@ -575,7 +573,7 @@ xfs_dir2_block_lookup(
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
 	xfs_inode_t		*dp;		/* incore inode */
@@ -593,7 +591,7 @@ xfs_dir2_block_lookup(
 		return error;
 	dp = args->dp;
 	mp = dp->i_mount;
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	xfs_dir2_data_check(dp, bp);
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
@@ -607,7 +605,7 @@ xfs_dir2_block_lookup(
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-	xfs_da_brelse(args->trans, bp);
+	xfs_trans_brelse(args->trans, bp);
 	return XFS_ERROR(error);
 }
 
@@ -617,13 +615,13 @@ xfs_dir2_block_lookup(
 static int					/* error */
 xfs_dir2_block_lookup_int(
 	xfs_da_args_t		*args,		/* dir lookup arguments */
-	xfs_dabuf_t		**bpp,		/* returned block buffer */
+	struct xfs_buf		**bpp,		/* returned block buffer */
 	int			*entno)		/* returned entry number */
 {
 	xfs_dir2_dataptr_t	addr;		/* data entry address */
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
 	xfs_inode_t		*dp;		/* incore inode */
@@ -647,7 +645,7 @@ xfs_dir2_block_lookup_int(
 		return error;
 	}
 	ASSERT(bp != NULL);
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	xfs_dir2_data_check(dp, bp);
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
@@ -666,7 +664,7 @@ xfs_dir2_block_lookup_int(
 			high = mid - 1;
 		if (low > high) {
 			ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-			xfs_da_brelse(tp, bp);
+			xfs_trans_brelse(tp, bp);
 			return XFS_ERROR(ENOENT);
 		}
 	}
@@ -714,7 +712,7 @@ xfs_dir2_block_lookup_int(
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
-	xfs_da_brelse(tp, bp);
+	xfs_trans_brelse(tp, bp);
 	return XFS_ERROR(ENOENT);
 }
 
@@ -728,7 +726,7 @@ xfs_dir2_block_removename(
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf pointer */
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
 	xfs_inode_t		*dp;		/* incore inode */
@@ -753,7 +751,7 @@ xfs_dir2_block_removename(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
@@ -790,10 +788,9 @@ xfs_dir2_block_removename(
 	 * See if the size as a shortform is good enough.
 	 */
 	size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-	if (size > XFS_IFORK_DSIZE(dp)) {
-		xfs_da_buf_done(bp);
+	if (size > XFS_IFORK_DSIZE(dp))
 		return 0;
-	}
+
 	/*
 	 * If it works, do the conversion.
 	 */
@@ -810,7 +807,7 @@ xfs_dir2_block_replace(
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
 	xfs_inode_t		*dp;		/* incore inode */
@@ -829,7 +826,7 @@ xfs_dir2_block_replace(
 	}
 	dp = args->dp;
 	mp = dp->i_mount;
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
@@ -844,7 +841,6 @@ xfs_dir2_block_replace(
 	dep->inumber = cpu_to_be64(args->inumber);
 	xfs_dir2_data_log_entry(args->trans, bp, dep);
 	xfs_dir2_data_check(dp, bp);
-	xfs_da_buf_done(bp);
 	return 0;
 }
 
@@ -871,8 +867,8 @@ xfs_dir2_block_sort(
 int						/* error */
 xfs_dir2_leaf_to_block(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*lbp,		/* leaf buffer */
-	xfs_dabuf_t		*dbp)		/* data buffer */
+	struct xfs_buf		*lbp,		/* leaf buffer */
+	struct xfs_buf		*dbp)		/* data buffer */
 {
 	__be16			*bestsp;	/* leaf bests table */
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
@@ -898,7 +894,7 @@ xfs_dir2_leaf_to_block(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
@@ -914,11 +910,9 @@ xfs_dir2_leaf_to_block(
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
 				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
-				goto out;
-		} else {
-			error = 0;
-			goto out;
-		}
+				return error;
+		} else
+			return 0;
 	}
 	/*
 	 * Read the data block if we don't already have it, give up if it fails.
@@ -926,9 +920,9 @@ xfs_dir2_leaf_to_block(
 	if (dbp == NULL &&
 	    (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
 		    XFS_DATA_FORK))) {
-		goto out;
+		return error;
 	}
-	hdr = dbp->data;
+	hdr = dbp->b_addr;
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
 	/*
 	 * Size of the "leaf" area in the block.
@@ -944,10 +938,9 @@ xfs_dir2_leaf_to_block(
 	 * If it's not free or is too short we can't do it.
 	 */
 	if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
-	    be16_to_cpu(dup->length) < size) {
-		error = 0;
-		goto out;
-	}
+	    be16_to_cpu(dup->length) < size)
+		return 0;
+
 	/*
 	 * Start converting it to block form.
 	 */
@@ -989,25 +982,17 @@ xfs_dir2_leaf_to_block(
 	 * Pitch the old leaf block.
 	 */
 	error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
-	lbp = NULL;
-	if (error) {
-		goto out;
-	}
+	if (error)
+		return error;
+
 	/*
 	 * Now see if the resulting block can be shrunken to shortform.
 	 */
 	size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-	if (size > XFS_IFORK_DSIZE(dp)) {
-		error = 0;
-		goto out;
-	}
+	if (size > XFS_IFORK_DSIZE(dp))
+		return 0;
+
 	return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
-out:
-	if (lbp)
-		xfs_da_buf_done(lbp);
-	if (dbp)
-		xfs_da_buf_done(dbp);
-	return error;
 }
 
 /*
@@ -1020,7 +1005,7 @@ xfs_dir2_sf_to_block(
 	xfs_dir2_db_t		blkno;		/* dir-relative block # (0) */
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -1088,7 +1073,7 @@ xfs_dir2_sf_to_block(
 		kmem_free(sfp);
 		return error;
 	}
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Compute size of block "tail" area.
@@ -1217,6 +1202,5 @@ xfs_dir2_sf_to_block(
 	xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	xfs_dir2_data_check(dp, bp);
-	xfs_da_buf_done(bp);
 	return 0;
 }
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 2046988..44ffd4d 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -42,8 +42,8 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
  */
 void
 xfs_dir2_data_check(
-	xfs_inode_t		*dp,		/* incore inode pointer */
-	xfs_dabuf_t		*bp)		/* data block's buffer */
+	struct xfs_inode	*dp,		/* incore inode pointer */
+	struct xfs_buf		*bp)		/* data block's buffer */
 {
 	xfs_dir2_dataptr_t	addr;		/* addr for leaf lookup */
 	xfs_dir2_data_free_t	*bf;		/* bestfree table */
@@ -65,7 +65,7 @@ xfs_dir2_data_check(
 	struct xfs_name		name;
 
 	mp = dp->i_mount;
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	bf = hdr->bestfree;
 	p = (char *)(hdr + 1);
 
@@ -389,9 +389,9 @@ int						/* error */
 xfs_dir2_data_init(
 	xfs_da_args_t		*args,		/* directory operation args */
 	xfs_dir2_db_t		blkno,		/* logical dir block number */
-	xfs_dabuf_t		**bpp)		/* output block buffer */
+	struct xfs_buf		**bpp)		/* output block buffer */
 {
-	xfs_dabuf_t		*bp;		/* block buffer */
+	struct xfs_buf		*bp;		/* block buffer */
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
@@ -417,7 +417,7 @@ xfs_dir2_data_init(
 	/*
 	 * Initialize the header.
 	 */
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
 	hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
@@ -449,16 +449,16 @@ xfs_dir2_data_init(
  */
 void
 xfs_dir2_data_log_entry(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* block buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 
-	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
 		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
 		       (char *)hdr - 1));
 }
@@ -468,15 +468,15 @@ xfs_dir2_data_log_entry(
  */
 void
 xfs_dir2_data_log_header(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp)		/* block buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 
-	xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
+	xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
 }
 
 /*
@@ -484,11 +484,11 @@ xfs_dir2_data_log_header(
  */
 void
 xfs_dir2_data_log_unused(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* block buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
 	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
@@ -496,13 +496,13 @@ xfs_dir2_data_log_unused(
 	/*
 	 * Log the first part of the unused entry.
 	 */
-	xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
 		(uint)((char *)&dup->length + sizeof(dup->length) -
 		       1 - (char *)hdr));
 	/*
 	 * Log the end (tag) of the unused entry.
 	 */
-	xfs_da_log_buf(tp, bp,
+	xfs_trans_log_buf(tp, bp,
 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
 		       sizeof(xfs_dir2_data_off_t) - 1));
@@ -514,8 +514,8 @@ xfs_dir2_data_log_unused(
  */
 void
 xfs_dir2_data_make_free(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* block buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
 	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
 	xfs_dir2_data_aoff_t	len,		/* length in bytes */
 	int			*needlogp,	/* out: log header */
@@ -531,7 +531,7 @@ xfs_dir2_data_make_free(
 	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
 
 	mp = tp->t_mountp;
-	hdr = bp->data;
+	hdr = bp->b_addr;
 
 	/*
 	 * Figure out where the end of the data area is.
@@ -696,8 +696,8 @@ xfs_dir2_data_make_free(
  */
 void
 xfs_dir2_data_use_free(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* data block buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
 	xfs_dir2_data_unused_t	*dup,		/* unused entry */
 	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
 	xfs_dir2_data_aoff_t	len,		/* length to use */
@@ -713,7 +713,7 @@ xfs_dir2_data_use_free(
 	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
 	int			oldlen;		/* old unused entry's length */
 
-	hdr = bp->data;
+	hdr = bp->b_addr;
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 397ffbc..69accf6 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -38,15 +38,15 @@
  * Local function declarations.
  */
 #ifdef DEBUG
-static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
 #else
 #define	xfs_dir2_leaf_check(dp, bp)
 #endif
-static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
-				    int *indexp, xfs_dabuf_t **dbpp);
-static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
+				    int *indexp, struct xfs_buf **dbpp);
+static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
 				    int first, int last);
-static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
 
 
 /*
@@ -55,7 +55,7 @@ static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
 int						/* error */
 xfs_dir2_block_to_leaf(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*dbp)		/* input block's buffer */
+	struct xfs_buf		*dbp)		/* input block's buffer */
 {
 	__be16			*bestsp;	/* leaf's bestsp entries */
 	xfs_dablk_t		blkno;		/* leaf block's bno */
@@ -64,7 +64,7 @@ xfs_dir2_block_to_leaf(
 	xfs_dir2_block_tail_t	*btp;		/* block's tail */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
-	xfs_dabuf_t		*lbp;		/* leaf block's buffer */
+	struct xfs_buf		*lbp;		/* leaf block's buffer */
 	xfs_dir2_db_t		ldb;		/* leaf block's bno */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
@@ -95,8 +95,8 @@ xfs_dir2_block_to_leaf(
 		return error;
 	}
 	ASSERT(lbp != NULL);
-	leaf = lbp->data;
-	hdr = dbp->data;
+	leaf = lbp->b_addr;
+	hdr = dbp->b_addr;
 	xfs_dir2_data_check(dp, dbp);
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
@@ -143,7 +143,6 @@ xfs_dir2_block_to_leaf(
 	xfs_dir2_leaf_check(dp, lbp);
 	xfs_dir2_data_check(dp, dbp);
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
-	xfs_da_buf_done(lbp);
 	return 0;
 }
 
@@ -282,7 +281,7 @@ xfs_dir2_leaf_addname(
 	__be16			*bestsp;	/* freespace table in leaf */
 	int			compact;	/* need to compact leaves */
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
@@ -291,7 +290,7 @@ xfs_dir2_leaf_addname(
 	int			highstale;	/* index of next stale leaf */
 	int			i;		/* temporary, index */
 	int			index;		/* leaf table position */
-	xfs_dabuf_t		*lbp;		/* leaf's buffer */
+	struct xfs_buf		*lbp;		/* leaf's buffer */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	int			length;		/* length of new entry */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
@@ -328,7 +327,7 @@ xfs_dir2_leaf_addname(
 	 * But if there are dup hash values the index is of the first of those.
 	 */
 	index = xfs_dir2_leaf_search_hash(args, lbp);
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	length = xfs_dir2_data_entsize(args->namelen);
@@ -402,14 +401,13 @@ xfs_dir2_leaf_addname(
 		 */
 		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
 							args->total == 0) {
-			xfs_da_brelse(tp, lbp);
+			xfs_trans_brelse(tp, lbp);
 			return XFS_ERROR(ENOSPC);
 		}
 		/*
 		 * Convert to node form.
 		 */
 		error = xfs_dir2_leaf_to_node(args, lbp);
-		xfs_da_buf_done(lbp);
 		if (error)
 			return error;
 		/*
@@ -427,7 +425,7 @@ xfs_dir2_leaf_addname(
 	 * a new data block.
 	 */
 	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-		xfs_da_brelse(tp, lbp);
+		xfs_trans_brelse(tp, lbp);
 		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
 	}
 	/*
@@ -435,7 +433,7 @@ xfs_dir2_leaf_addname(
 	 * changed anything.
 	 */
 	if (args->total == 0 && use_block == -1) {
-		xfs_da_brelse(tp, lbp);
+		xfs_trans_brelse(tp, lbp);
 		return XFS_ERROR(ENOSPC);
 	}
 	/*
@@ -466,14 +464,14 @@ xfs_dir2_leaf_addname(
 		 */
 		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
 				&use_block))) {
-			xfs_da_brelse(tp, lbp);
+			xfs_trans_brelse(tp, lbp);
 			return error;
 		}
 		/*
 		 * Initialize the block.
 		 */
 		if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
-			xfs_da_brelse(tp, lbp);
+			xfs_trans_brelse(tp, lbp);
 			return error;
 		}
 		/*
@@ -493,7 +491,7 @@ xfs_dir2_leaf_addname(
 		 */
 		else
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
-		hdr = dbp->data;
+		hdr = dbp->b_addr;
 		bestsp[use_block] = hdr->bestfree[0].length;
 		grown = 1;
 	}
@@ -505,10 +503,10 @@ xfs_dir2_leaf_addname(
 		if ((error =
 		    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
 			    -1, &dbp, XFS_DATA_FORK))) {
-			xfs_da_brelse(tp, lbp);
+			xfs_trans_brelse(tp, lbp);
 			return error;
 		}
-		hdr = dbp->data;
+		hdr = dbp->b_addr;
 		grown = 0;
 	}
 	xfs_dir2_data_check(dp, dbp);
@@ -570,9 +568,7 @@ xfs_dir2_leaf_addname(
 	xfs_dir2_leaf_log_header(tp, lbp);
 	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
 	xfs_dir2_leaf_check(dp, lbp);
-	xfs_da_buf_done(lbp);
 	xfs_dir2_data_check(dp, dbp);
-	xfs_da_buf_done(dbp);
 	return 0;
 }
 
@@ -583,8 +579,8 @@ xfs_dir2_leaf_addname(
  */
 STATIC void
 xfs_dir2_leaf_check(
-	xfs_inode_t		*dp,		/* incore directory inode */
-	xfs_dabuf_t		*bp)		/* leaf's buffer */
+	struct xfs_inode	*dp,		/* incore directory inode */
+	struct xfs_buf		*bp)		/* leaf's buffer */
 {
 	int			i;		/* leaf index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
@@ -592,7 +588,7 @@ xfs_dir2_leaf_check(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			stale;		/* count of stale leaves */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	mp = dp->i_mount;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
 	/*
@@ -628,14 +624,14 @@ xfs_dir2_leaf_check(
 void
 xfs_dir2_leaf_compact(
 	xfs_da_args_t	*args,		/* operation arguments */
-	xfs_dabuf_t	*bp)		/* leaf buffer */
+	struct xfs_buf	*bp)		/* leaf buffer */
 {
 	int		from;		/* source leaf index */
 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
 	int		loglow;		/* first leaf entry to log */
 	int		to;		/* target leaf index */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	if (!leaf->hdr.stale) {
 		return;
 	}
@@ -677,7 +673,7 @@ xfs_dir2_leaf_compact(
  */
 void
 xfs_dir2_leaf_compact_x1(
-	xfs_dabuf_t	*bp,		/* leaf buffer */
+	struct xfs_buf	*bp,		/* leaf buffer */
 	int		*indexp,	/* insertion index */
 	int		*lowstalep,	/* out: stale entry before us */
 	int		*highstalep,	/* out: stale entry after us */
@@ -693,7 +689,7 @@ xfs_dir2_leaf_compact_x1(
 	int		newindex=0;	/* new insertion index */
 	int		to;		/* destination copy index */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
 	index = *indexp;
 
@@ -775,7 +771,7 @@ xfs_dir2_leaf_getdents(
 	xfs_off_t		*offset,
 	filldir_t		filldir)
 {
-	xfs_dabuf_t		*bp;		/* data block buffer */
+	struct xfs_buf		*bp;		/* data block buffer */
 	int			byteoff;	/* offset in current block */
 	xfs_dir2_db_t		curdb;		/* db for current block */
 	xfs_dir2_off_t		curoff;		/* current overall offset */
@@ -839,13 +835,13 @@ xfs_dir2_leaf_getdents(
 		 * If we have no buffer, or we're off the end of the
 		 * current buffer, need to get another one.
 		 */
-		if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) {
+		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
 			/*
 			 * If we have a buffer, we need to release it and
 			 * take it out of the mapping.
 			 */
 			if (bp) {
-				xfs_da_brelse(NULL, bp);
+				xfs_trans_brelse(NULL, bp);
 				bp = NULL;
 				map_blocks -= mp->m_dirblkfsbs;
 				/*
@@ -1035,7 +1031,7 @@ xfs_dir2_leaf_getdents(
 			else if (curoff > newoff)
 				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
 				       curdb);
-			hdr = bp->data;
+			hdr = bp->b_addr;
 			xfs_dir2_data_check(dp, bp);
 			/*
 			 * Find our position in the block.
@@ -1119,7 +1115,7 @@ xfs_dir2_leaf_getdents(
 		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
 	kmem_free(map);
 	if (bp)
-		xfs_da_brelse(NULL, bp);
+		xfs_trans_brelse(NULL, bp);
 	return error;
 }
 
@@ -1130,10 +1126,10 @@ int
 xfs_dir2_leaf_init(
 	xfs_da_args_t		*args,		/* operation arguments */
 	xfs_dir2_db_t		bno,		/* directory block number */
-	xfs_dabuf_t		**bpp,		/* out: leaf buffer */
+	struct xfs_buf		**bpp,		/* out: leaf buffer */
 	int			magic)		/* magic number for block */
 {
-	xfs_dabuf_t		*bp;		/* leaf buffer */
+	struct xfs_buf		*bp;		/* leaf buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
@@ -1156,7 +1152,7 @@ xfs_dir2_leaf_init(
 		return error;
 	}
 	ASSERT(bp != NULL);
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	/*
 	 * Initialize the header.
 	 */
@@ -1186,7 +1182,7 @@ xfs_dir2_leaf_init(
 static void
 xfs_dir2_leaf_log_bests(
 	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
 {
@@ -1195,12 +1191,12 @@ xfs_dir2_leaf_log_bests(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
 	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
 	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
 	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
-	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
 }
 
@@ -1210,7 +1206,7 @@ xfs_dir2_leaf_log_bests(
 void
 xfs_dir2_leaf_log_ents(
 	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
 {
@@ -1218,12 +1214,12 @@ xfs_dir2_leaf_log_ents(
 	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	firstlep = &leaf->ents[first];
 	lastlep = &leaf->ents[last];
-	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
 		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
 }
 
@@ -1232,15 +1228,15 @@ xfs_dir2_leaf_log_ents(
  */
 void
 xfs_dir2_leaf_log_header(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp)		/* leaf buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
 {
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
-	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
 		(uint)(sizeof(leaf->hdr) - 1));
 }
 
@@ -1249,18 +1245,18 @@ xfs_dir2_leaf_log_header(
  */
 STATIC void
 xfs_dir2_leaf_log_tail(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp)		/* leaf buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
 {
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 	xfs_mount_t		*mp;		/* filesystem mount point */
 
 	mp = tp->t_mountp;
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
 }
 
@@ -1273,12 +1269,12 @@ int
 xfs_dir2_leaf_lookup(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	int			index;		/* found entry index */
-	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	struct xfs_buf		*lbp;		/* leaf buffer */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
@@ -1294,7 +1290,7 @@ xfs_dir2_leaf_lookup(
 	tp = args->trans;
 	dp = args->dp;
 	xfs_dir2_leaf_check(dp, lbp);
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	/*
 	 * Get to the leaf entry and contained data entry address.
 	 */
@@ -1303,15 +1299,15 @@ xfs_dir2_leaf_lookup(
 	 * Point to the data entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)dbp->data +
+	      ((char *)dbp->b_addr +
 	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number & CI name if appropriate
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-	xfs_da_brelse(tp, dbp);
-	xfs_da_brelse(tp, lbp);
+	xfs_trans_brelse(tp, dbp);
+	xfs_trans_brelse(tp, lbp);
 	return XFS_ERROR(error);
 }
 
@@ -1324,17 +1320,17 @@ xfs_dir2_leaf_lookup(
 static int					/* error */
 xfs_dir2_leaf_lookup_int(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		**lbpp,		/* out: leaf buffer */
+	struct xfs_buf		**lbpp,		/* out: leaf buffer */
 	int			*indexp,	/* out: index in leaf block */
-	xfs_dabuf_t		**dbpp)		/* out: data buffer */
+	struct xfs_buf		**dbpp)		/* out: data buffer */
 {
 	xfs_dir2_db_t		curdb = -1;	/* current data block number */
-	xfs_dabuf_t		*dbp = NULL;	/* data buffer */
+	struct xfs_buf		*dbp = NULL;	/* data buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	int			index;		/* index in leaf block */
-	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	struct xfs_buf		*lbp;		/* leaf buffer */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_mount_t		*mp;		/* filesystem mount point */
@@ -1354,7 +1350,7 @@ xfs_dir2_leaf_lookup_int(
 	if (error)
 		return error;
 	*lbpp = lbp;
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	xfs_dir2_leaf_check(dp, lbp);
 	/*
 	 * Look for the first leaf entry with our hash value.
@@ -1382,12 +1378,12 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		if (newdb != curdb) {
 			if (dbp)
-				xfs_da_brelse(tp, dbp);
+				xfs_trans_brelse(tp, dbp);
 			error = xfs_da_read_buf(tp, dp,
 						xfs_dir2_db_to_da(mp, newdb),
 						-1, &dbp, XFS_DATA_FORK);
 			if (error) {
-				xfs_da_brelse(tp, lbp);
+				xfs_trans_brelse(tp, lbp);
 				return error;
 			}
 			xfs_dir2_data_check(dp, dbp);
@@ -1396,7 +1392,7 @@ xfs_dir2_leaf_lookup_int(
 		/*
 		 * Point to the data entry.
 		 */
-		dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
 			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
 		 * Compare name and if it's an exact match, return the index
@@ -1424,12 +1420,12 @@ xfs_dir2_leaf_lookup_int(
 	if (args->cmpresult == XFS_CMP_CASE) {
 		ASSERT(cidb != -1);
 		if (cidb != curdb) {
-			xfs_da_brelse(tp, dbp);
+			xfs_trans_brelse(tp, dbp);
 			error = xfs_da_read_buf(tp, dp,
 						xfs_dir2_db_to_da(mp, cidb),
 						-1, &dbp, XFS_DATA_FORK);
 			if (error) {
-				xfs_da_brelse(tp, lbp);
+				xfs_trans_brelse(tp, lbp);
 				return error;
 			}
 		}
@@ -1441,8 +1437,8 @@ xfs_dir2_leaf_lookup_int(
 	 */
 	ASSERT(cidb == -1);
 	if (dbp)
-		xfs_da_brelse(tp, dbp);
-	xfs_da_brelse(tp, lbp);
+		xfs_trans_brelse(tp, dbp);
+	xfs_trans_brelse(tp, lbp);
 	return XFS_ERROR(ENOENT);
 }
 
@@ -1456,13 +1452,13 @@ xfs_dir2_leaf_removename(
 	__be16			*bestsp;	/* leaf block best freespace */
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_db_t		db;		/* data block number */
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	xfs_dir2_db_t		i;		/* temporary data block # */
 	int			index;		/* index into leaf entries */
-	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	struct xfs_buf		*lbp;		/* leaf buffer */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
@@ -1483,8 +1479,8 @@ xfs_dir2_leaf_removename(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	leaf = lbp->data;
-	hdr = dbp->data;
+	leaf = lbp->b_addr;
+	hdr = dbp->b_addr;
 	xfs_dir2_data_check(dp, dbp);
 	/*
 	 * Point to the leaf entry, use that to point to the data entry.
@@ -1541,12 +1537,9 @@ xfs_dir2_leaf_removename(
 			 * Just go on, returning success, leaving the
 			 * empty block in place.
 			 */
-			if (error == ENOSPC && args->total == 0) {
-				xfs_da_buf_done(dbp);
+			if (error == ENOSPC && args->total == 0)
 				error = 0;
-			}
 			xfs_dir2_leaf_check(dp, lbp);
-			xfs_da_buf_done(lbp);
 			return error;
 		}
 		dbp = NULL;
@@ -1577,10 +1570,9 @@ xfs_dir2_leaf_removename(
 	/*
 	 * If the data block was not the first one, drop it.
 	 */
-	else if (db != mp->m_dirdatablk && dbp != NULL) {
-		xfs_da_buf_done(dbp);
+	else if (db != mp->m_dirdatablk)
 		dbp = NULL;
-	}
+
 	xfs_dir2_leaf_check(dp, lbp);
 	/*
 	 * See if we can convert to block form.
@@ -1595,12 +1587,12 @@ int						/* error */
 xfs_dir2_leaf_replace(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	int			index;		/* index of leaf entry */
-	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	struct xfs_buf		*lbp;		/* leaf buffer */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
@@ -1614,7 +1606,7 @@ xfs_dir2_leaf_replace(
 		return error;
 	}
 	dp = args->dp;
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	/*
 	 * Point to the leaf entry, get data address from it.
 	 */
@@ -1623,7 +1615,7 @@ xfs_dir2_leaf_replace(
 	 * Point to the data entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)dbp->data +
+	      ((char *)dbp->b_addr +
 	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
 	/*
@@ -1632,9 +1624,8 @@ xfs_dir2_leaf_replace(
 	dep->inumber = cpu_to_be64(args->inumber);
 	tp = args->trans;
 	xfs_dir2_data_log_entry(tp, dbp, dep);
-	xfs_da_buf_done(dbp);
 	xfs_dir2_leaf_check(dp, lbp);
-	xfs_da_brelse(tp, lbp);
+	xfs_trans_brelse(tp, lbp);
 	return 0;
 }
 
@@ -1646,7 +1637,7 @@ xfs_dir2_leaf_replace(
 int						/* index value */
 xfs_dir2_leaf_search_hash(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*lbp)		/* leaf buffer */
+	struct xfs_buf		*lbp)		/* leaf buffer */
 {
 	xfs_dahash_t		hash=0;		/* hash from this entry */
 	xfs_dahash_t		hashwant;	/* hash value looking for */
@@ -1656,7 +1647,7 @@ xfs_dir2_leaf_search_hash(
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	int			mid=0;		/* current leaf index */
 
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 #ifndef __KERNEL__
 	if (!leaf->hdr.count)
 		return 0;
@@ -1699,11 +1690,11 @@ xfs_dir2_leaf_search_hash(
 int						/* error */
 xfs_dir2_leaf_trim_data(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*lbp,		/* leaf buffer */
+	struct xfs_buf		*lbp,		/* leaf buffer */
 	xfs_dir2_db_t		db)		/* data block number */
 {
 	__be16			*bestsp;	/* leaf bests table */
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
@@ -1722,12 +1713,12 @@ xfs_dir2_leaf_trim_data(
 		return error;
 	}
 
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 
 #ifdef DEBUG
 {
-	struct xfs_dir2_data_hdr *hdr = dbp->data;
+	struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
 	ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
@@ -1741,7 +1732,7 @@ xfs_dir2_leaf_trim_data(
 	 */
 	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
 		ASSERT(error != ENOSPC);
-		xfs_da_brelse(tp, dbp);
+		xfs_trans_brelse(tp, dbp);
 		return error;
 	}
 	/*
@@ -1781,10 +1772,10 @@ xfs_dir2_node_to_leaf(
 	xfs_da_args_t		*args;		/* operation arguments */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
-	xfs_dabuf_t		*fbp;		/* buffer for freespace block */
+	struct xfs_buf		*fbp;		/* buffer for freespace block */
 	xfs_fileoff_t		fo;		/* freespace file offset */
 	xfs_dir2_free_t		*free;		/* freespace structure */
-	xfs_dabuf_t		*lbp;		/* buffer for leaf block */
+	struct xfs_buf		*lbp;		/* buffer for leaf block */
 	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_mount_t		*mp;		/* filesystem mount point */
@@ -1838,7 +1829,7 @@ xfs_dir2_node_to_leaf(
 	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
 		return 0;
 	lbp = state->path.blk[0].bp;
-	leaf = lbp->data;
+	leaf = lbp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	/*
 	 * Read the freespace block.
@@ -1847,7 +1838,7 @@ xfs_dir2_node_to_leaf(
 			XFS_DATA_FORK))) {
 		return error;
 	}
-	free = fbp->data;
+	free = fbp->b_addr;
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 	ASSERT(!free->hdr.firstdb);
 
@@ -1857,7 +1848,7 @@ xfs_dir2_node_to_leaf(
 	 */
 	if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
 			mp->m_dirblksize) {
-		xfs_da_brelse(tp, fbp);
+		xfs_trans_brelse(tp, fbp);
 		return 0;
 	}
 
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index b0f2678..6c70524 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -36,20 +36,20 @@
 /*
  * Function declarations.
  */
-static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index);
+static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
+			      int index);
 #ifdef DEBUG
-static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp);
 #else
 #define	xfs_dir2_leafn_check(dp, bp)
 #endif
-static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s,
-				    int start_s, xfs_dabuf_t *bp_d, int start_d,
-				    int count);
+static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s,
+				    int start_s, struct xfs_buf *bp_d,
+				    int start_d, int count);
 static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
 				     xfs_da_state_blk_t *blk1,
 				     xfs_da_state_blk_t *blk2);
-static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp,
+static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
 				 int index, xfs_da_state_blk_t *dblk,
 				 int *rval);
 static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
@@ -60,16 +60,16 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
  */
 STATIC void
 xfs_dir2_free_log_bests(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp,		/* freespace buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
 {
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
-	free = bp->data;
+	free = bp->b_addr;
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
-	xfs_da_log_buf(tp, bp,
+	xfs_trans_log_buf(tp, bp,
 		(uint)((char *)&free->bests[first] - (char *)free),
 		(uint)((char *)&free->bests[last] - (char *)free +
 		       sizeof(free->bests[0]) - 1));
@@ -80,14 +80,14 @@ xfs_dir2_free_log_bests(
  */
 static void
 xfs_dir2_free_log_header(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_dabuf_t		*bp)		/* freespace buffer */
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
 {
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
-	free = bp->data;
+	free = bp->b_addr;
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
-	xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
+	xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
 		(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
 }
 
@@ -99,11 +99,11 @@ xfs_dir2_free_log_header(
 int						/* error */
 xfs_dir2_leaf_to_node(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*lbp)		/* leaf buffer */
+	struct xfs_buf		*lbp)		/* leaf buffer */
 {
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
-	xfs_dabuf_t		*fbp;		/* freespace buffer */
+	struct xfs_buf		*fbp;		/* freespace buffer */
 	xfs_dir2_db_t		fdb;		/* freespace block number */
 	xfs_dir2_free_t		*free;		/* freespace structure */
 	__be16			*from;		/* pointer to freespace entry */
@@ -136,8 +136,8 @@ xfs_dir2_leaf_to_node(
 		return error;
 	}
 	ASSERT(fbp != NULL);
-	free = fbp->data;
-	leaf = lbp->data;
+	free = fbp->b_addr;
+	leaf = lbp->b_addr;
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * Initialize the freespace block header.
@@ -164,7 +164,6 @@ xfs_dir2_leaf_to_node(
 	xfs_dir2_leaf_log_header(tp, lbp);
 	xfs_dir2_free_log_header(tp, fbp);
 	xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
-	xfs_da_buf_done(fbp);
 	xfs_dir2_leafn_check(dp, lbp);
 	return 0;
 }
@@ -175,7 +174,7 @@ xfs_dir2_leaf_to_node(
  */
 static int					/* error */
 xfs_dir2_leafn_add(
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	xfs_da_args_t		*args,		/* operation arguments */
 	int			index)		/* insertion pt for new entry */
 {
@@ -195,7 +194,7 @@ xfs_dir2_leafn_add(
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	leaf = bp->data;
+	leaf = bp->b_addr;
 
 	/*
 	 * Quick check just to make sure we are not going to index
@@ -261,15 +260,15 @@ xfs_dir2_leafn_add(
  */
 void
 xfs_dir2_leafn_check(
-	xfs_inode_t	*dp,			/* incore directory inode */
-	xfs_dabuf_t	*bp)			/* leaf buffer */
+	struct xfs_inode *dp,
+	struct xfs_buf	*bp)
 {
 	int		i;			/* leaf index */
 	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
 	xfs_mount_t	*mp;			/* filesystem mount point */
 	int		stale;			/* count of stale leaves */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	mp = dp->i_mount;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
@@ -291,12 +290,12 @@ xfs_dir2_leafn_check(
  */
 xfs_dahash_t					/* hash value */
 xfs_dir2_leafn_lasthash(
-	xfs_dabuf_t	*bp,			/* leaf buffer */
+	struct xfs_buf	*bp,			/* leaf buffer */
 	int		*count)			/* count of entries in leaf */
 {
 	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
 
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
@@ -311,12 +310,12 @@ xfs_dir2_leafn_lasthash(
  */
 STATIC int
 xfs_dir2_leafn_lookup_for_addname(
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	xfs_da_args_t		*args,		/* operation arguments */
 	int			*indexp,	/* out: leaf entry index */
 	xfs_da_state_t		*state)		/* state to fill in */
 {
-	xfs_dabuf_t		*curbp = NULL;	/* current data/free buffer */
+	struct xfs_buf		*curbp = NULL;	/* current data/free buffer */
 	xfs_dir2_db_t		curdb = -1;	/* current data block number */
 	xfs_dir2_db_t		curfdb = -1;	/* current free block number */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -335,7 +334,7 @@ xfs_dir2_leafn_lookup_for_addname(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
@@ -352,7 +351,7 @@ xfs_dir2_leafn_lookup_for_addname(
 		/* If so, it's a free block buffer, get the block number. */
 		curbp = state->extrablk.bp;
 		curfdb = state->extrablk.blkno;
-		free = curbp->data;
+		free = curbp->b_addr;
 		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 	}
 	length = xfs_dir2_data_entsize(args->namelen);
@@ -394,7 +393,7 @@ xfs_dir2_leafn_lookup_for_addname(
 				 * If we had one before, drop it.
 				 */
 				if (curbp)
-					xfs_da_brelse(tp, curbp);
+					xfs_trans_brelse(tp, curbp);
 				/*
 				 * Read the free block.
 				 */
@@ -403,7 +402,7 @@ xfs_dir2_leafn_lookup_for_addname(
 						-1, &curbp, XFS_DATA_FORK);
 				if (error)
 					return error;
-				free = curbp->data;
+				free = curbp->b_addr;
 				ASSERT(be32_to_cpu(free->hdr.magic) ==
 					XFS_DIR2_FREE_MAGIC);
 				ASSERT((be32_to_cpu(free->hdr.firstdb) %
@@ -424,7 +423,7 @@ xfs_dir2_leafn_lookup_for_addname(
 				XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
 							XFS_ERRLEVEL_LOW, mp);
 				if (curfdb != newfdb)
-					xfs_da_brelse(tp, curbp);
+					xfs_trans_brelse(tp, curbp);
 				return XFS_ERROR(EFSCORRUPTED);
 			}
 			curfdb = newfdb;
@@ -459,12 +458,12 @@ out:
  */
 STATIC int
 xfs_dir2_leafn_lookup_for_entry(
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	xfs_da_args_t		*args,		/* operation arguments */
 	int			*indexp,	/* out: leaf entry index */
 	xfs_da_state_t		*state)		/* state to fill in */
 {
-	xfs_dabuf_t		*curbp = NULL;	/* current data/free buffer */
+	struct xfs_buf		*curbp = NULL;	/* current data/free buffer */
 	xfs_dir2_db_t		curdb = -1;	/* current data block number */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -480,7 +479,7 @@ xfs_dir2_leafn_lookup_for_entry(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
@@ -525,7 +524,7 @@ xfs_dir2_leafn_lookup_for_entry(
 			 */
 			if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
 						curdb != state->extrablk.blkno))
-				xfs_da_brelse(tp, curbp);
+				xfs_trans_brelse(tp, curbp);
 			/*
 			 * If needing the block that is saved with a CI match,
 			 * use it otherwise read in the new data block.
@@ -547,7 +546,7 @@ xfs_dir2_leafn_lookup_for_entry(
 		/*
 		 * Point to the data entry.
 		 */
-		dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+		dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
 			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
 		 * Compare the entry and if it's an exact match, return
@@ -559,7 +558,7 @@ xfs_dir2_leafn_lookup_for_entry(
 			/* If there is a CI match block, drop it */
 			if (args->cmpresult != XFS_CMP_DIFFERENT &&
 						curdb != state->extrablk.blkno)
-				xfs_da_brelse(tp, state->extrablk.bp);
+				xfs_trans_brelse(tp, state->extrablk.bp);
 			args->cmpresult = cmp;
 			args->inumber = be64_to_cpu(dep->inumber);
 			*indexp = index;
@@ -567,7 +566,7 @@ xfs_dir2_leafn_lookup_for_entry(
 			state->extrablk.bp = curbp;
 			state->extrablk.blkno = curdb;
 			state->extrablk.index = (int)((char *)dep -
-							(char *)curbp->data);
+							(char *)curbp->b_addr);
 			state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
 			if (cmp == XFS_CMP_EXACT)
 				return XFS_ERROR(EEXIST);
@@ -586,7 +585,7 @@ xfs_dir2_leafn_lookup_for_entry(
 		} else {
 			/* If the curbp is not the CI match block, drop it */
 			if (state->extrablk.bp != curbp)
-				xfs_da_brelse(tp, curbp);
+				xfs_trans_brelse(tp, curbp);
 		}
 	} else {
 		state->extravalid = 0;
@@ -602,7 +601,7 @@ xfs_dir2_leafn_lookup_for_entry(
  */
 int
 xfs_dir2_leafn_lookup_int(
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	xfs_da_args_t		*args,		/* operation arguments */
 	int			*indexp,	/* out: leaf entry index */
 	xfs_da_state_t		*state)		/* state to fill in */
@@ -620,9 +619,9 @@ xfs_dir2_leafn_lookup_int(
 static void
 xfs_dir2_leafn_moveents(
 	xfs_da_args_t	*args,			/* operation arguments */
-	xfs_dabuf_t	*bp_s,			/* source leaf buffer */
+	struct xfs_buf	*bp_s,			/* source leaf buffer */
 	int		start_s,		/* source leaf index */
-	xfs_dabuf_t	*bp_d,			/* destination leaf buffer */
+	struct xfs_buf	*bp_d,			/* destination leaf buffer */
 	int		start_d,		/* destination leaf index */
 	int		count)			/* count of leaves to copy */
 {
@@ -640,8 +639,8 @@ xfs_dir2_leafn_moveents(
 		return;
 	}
 	tp = args->trans;
-	leaf_s = bp_s->data;
-	leaf_d = bp_d->data;
+	leaf_s = bp_s->b_addr;
+	leaf_d = bp_d->b_addr;
 	/*
 	 * If the destination index is not the end of the current
 	 * destination leaf entries, open up a hole in the destination
@@ -702,14 +701,14 @@ xfs_dir2_leafn_moveents(
  */
 int						/* sort order */
 xfs_dir2_leafn_order(
-	xfs_dabuf_t	*leaf1_bp,		/* leaf1 buffer */
-	xfs_dabuf_t	*leaf2_bp)		/* leaf2 buffer */
+	struct xfs_buf	*leaf1_bp,		/* leaf1 buffer */
+	struct xfs_buf	*leaf2_bp)		/* leaf2 buffer */
 {
 	xfs_dir2_leaf_t	*leaf1;			/* leaf1 structure */
 	xfs_dir2_leaf_t	*leaf2;			/* leaf2 structure */
 
-	leaf1 = leaf1_bp->data;
-	leaf2 = leaf2_bp->data;
+	leaf1 = leaf1_bp->b_addr;
+	leaf2 = leaf2_bp->b_addr;
 	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
@@ -757,8 +756,8 @@ xfs_dir2_leafn_rebalance(
 		blk1 = blk2;
 		blk2 = tmp;
 	}
-	leaf1 = blk1->bp->data;
-	leaf2 = blk2->bp->data;
+	leaf1 = blk1->bp->b_addr;
+	leaf2 = blk2->bp->b_addr;
 	oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
 #ifdef DEBUG
 	oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
@@ -834,14 +833,14 @@ xfs_dir2_leafn_rebalance(
 static int					/* error */
 xfs_dir2_leafn_remove(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*bp,		/* leaf buffer */
+	struct xfs_buf		*bp,		/* leaf buffer */
 	int			index,		/* leaf entry index */
 	xfs_da_state_blk_t	*dblk,		/* data block */
 	int			*rval)		/* resulting block needs join */
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_db_t		db;		/* data block number */
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
@@ -858,7 +857,7 @@ xfs_dir2_leafn_remove(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	leaf = bp->data;
+	leaf = bp->b_addr;
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	/*
 	 * Point to the entry we're removing.
@@ -884,7 +883,7 @@ xfs_dir2_leafn_remove(
 	 * in the data block in case it changes.
 	 */
 	dbp = dblk->bp;
-	hdr = dbp->data;
+	hdr = dbp->b_addr;
 	dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
 	longest = be16_to_cpu(hdr->bestfree[0].length);
 	needlog = needscan = 0;
@@ -905,7 +904,7 @@ xfs_dir2_leafn_remove(
 	 */
 	if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
 		int		error;		/* error return value */
-		xfs_dabuf_t	*fbp;		/* freeblock buffer */
+		struct xfs_buf	*fbp;		/* freeblock buffer */
 		xfs_dir2_db_t	fdb;		/* freeblock block number */
 		int		findex;		/* index in freeblock entries */
 		xfs_dir2_free_t	*free;		/* freeblock structure */
@@ -920,7 +919,7 @@ xfs_dir2_leafn_remove(
 				-1, &fbp, XFS_DATA_FORK))) {
 			return error;
 		}
-		free = fbp->data;
+		free = fbp->b_addr;
 		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 		ASSERT(be32_to_cpu(free->hdr.firstdb) ==
 		       xfs_dir2_free_max_bests(mp) *
@@ -948,9 +947,7 @@ xfs_dir2_leafn_remove(
 			 * In this case just drop the buffer and some one else
 			 * will eventually get rid of the empty block.
 			 */
-			else if (error == ENOSPC && args->total == 0)
-				xfs_da_buf_done(dbp);
-			else
+			else if (!(error == ENOSPC && args->total == 0))
 				return error;
 		}
 		/*
@@ -1018,11 +1015,6 @@ xfs_dir2_leafn_remove(
 		 */
 		if (logfree)
 			xfs_dir2_free_log_bests(tp, fbp, findex, findex);
-		/*
-		 * Drop the buffer if we still have it.
-		 */
-		if (fbp)
-			xfs_da_buf_done(fbp);
 	}
 	xfs_dir2_leafn_check(dp, bp);
 	/*
@@ -1114,7 +1106,7 @@ xfs_dir2_leafn_toosmall(
 {
 	xfs_da_state_blk_t	*blk;		/* leaf block */
 	xfs_dablk_t		blkno;		/* leaf block number */
-	xfs_dabuf_t		*bp;		/* leaf buffer */
+	struct xfs_buf		*bp;		/* leaf buffer */
 	int			bytes;		/* bytes in use */
 	int			count;		/* leaf live entry count */
 	int			error;		/* error return value */
@@ -1130,7 +1122,7 @@ xfs_dir2_leafn_toosmall(
 	 * to coalesce with a sibling.
 	 */
 	blk = &state->path.blk[state->path.active - 1];
-	info = blk->bp->data;
+	info = blk->bp->b_addr;
 	ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	leaf = (xfs_dir2_leaf_t *)info;
 	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
@@ -1189,7 +1181,7 @@ xfs_dir2_leafn_toosmall(
 		leaf = (xfs_dir2_leaf_t *)info;
 		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
-		leaf = bp->data;
+		leaf = bp->b_addr;
 		ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
@@ -1198,7 +1190,7 @@ xfs_dir2_leafn_toosmall(
 		 */
 		if (bytes >= 0)
 			break;
-		xfs_da_brelse(state->args->trans, bp);
+		xfs_trans_brelse(state->args->trans, bp);
 	}
 	/*
 	 * Didn't like either block, give up.
@@ -1207,11 +1199,7 @@ xfs_dir2_leafn_toosmall(
 		*action = 0;
 		return 0;
 	}
-	/*
-	 * Done with the sibling leaf block here, drop the dabuf
-	 * so path_shift can get it.
-	 */
-	xfs_da_buf_done(bp);
+
 	/*
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
@@ -1247,8 +1235,8 @@ xfs_dir2_leafn_unbalance(
 	args = state->args;
 	ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
-	drop_leaf = drop_blk->bp->data;
-	save_leaf = save_blk->bp->data;
+	drop_leaf = drop_blk->bp->b_addr;
+	save_leaf = save_blk->bp->b_addr;
 	ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 	/*
@@ -1356,13 +1344,13 @@ xfs_dir2_node_addname_int(
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_db_t		dbno;		/* data block number */
-	xfs_dabuf_t		*dbp;		/* data block buffer */
+	struct xfs_buf		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_data_unused_t	*dup;		/* data unused entry pointer */
 	int			error;		/* error return value */
 	xfs_dir2_db_t		fbno;		/* freespace block number */
-	xfs_dabuf_t		*fbp;		/* freespace buffer */
+	struct xfs_buf		*fbp;		/* freespace buffer */
 	int			findex;		/* freespace entry index */
 	xfs_dir2_free_t		*free=NULL;	/* freespace block structure */
 	xfs_dir2_db_t		ifbno;		/* initial freespace block no */
@@ -1390,7 +1378,7 @@ xfs_dir2_node_addname_int(
 		 * Remember initial freespace block number.
 		 */
 		ifbno = fblk->blkno;
-		free = fbp->data;
+		free = fbp->b_addr;
 		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 		findex = fblk->index;
 		/*
@@ -1474,7 +1462,7 @@ xfs_dir2_node_addname_int(
 			if (unlikely(fbp == NULL)) {
 				continue;
 			}
-			free = fbp->data;
+			free = fbp->b_addr;
 			ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 			findex = 0;
 		}
@@ -1492,7 +1480,7 @@ xfs_dir2_node_addname_int(
 				/*
 				 * Drop the block.
 				 */
-				xfs_da_brelse(tp, fbp);
+				xfs_trans_brelse(tp, fbp);
 				fbp = NULL;
 				if (fblk && fblk->bp)
 					fblk->bp = NULL;
@@ -1507,36 +1495,23 @@ xfs_dir2_node_addname_int(
 		/*
 		 * Not allowed to allocate, return failure.
 		 */
-		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
-							args->total == 0) {
-			/*
-			 * Drop the freespace buffer unless it came from our
-			 * caller.
-			 */
-			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-				xfs_da_buf_done(fbp);
+		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
 			return XFS_ERROR(ENOSPC);
-		}
+
 		/*
 		 * Allocate and initialize the new data block.
 		 */
 		if (unlikely((error = xfs_dir2_grow_inode(args,
 							 XFS_DIR2_DATA_SPACE,
 							 &dbno)) ||
-		    (error = xfs_dir2_data_init(args, dbno, &dbp)))) {
-			/*
-			 * Drop the freespace buffer unless it came from our
-			 * caller.
-			 */
-			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-				xfs_da_buf_done(fbp);
+		    (error = xfs_dir2_data_init(args, dbno, &dbp))))
 			return error;
-		}
+
 		/*
 		 * If (somehow) we have a freespace block, get rid of it.
 		 */
 		if (fbp)
-			xfs_da_brelse(tp, fbp);
+			xfs_trans_brelse(tp, fbp);
 		if (fblk && fblk->bp)
 			fblk->bp = NULL;
 
@@ -1547,10 +1522,9 @@ xfs_dir2_node_addname_int(
 		fbno = xfs_dir2_db_to_fdb(mp, dbno);
 		if (unlikely(error = xfs_da_read_buf(tp, dp,
 				xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
-				XFS_DATA_FORK))) {
-			xfs_da_buf_done(dbp);
+				XFS_DATA_FORK)))
 			return error;
-  		}
+
 		/*
 		 * If there wasn't a freespace block, the read will
 		 * return a NULL fbp.  Allocate and initialize a new one.
@@ -1598,7 +1572,7 @@ xfs_dir2_node_addname_int(
 			 * Initialize the new block to be empty, and remember
 			 * its first slot as our empty slot.
 			 */
-			free = fbp->data;
+			free = fbp->b_addr;
 			free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 			free->hdr.firstdb = cpu_to_be32(
 				(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
@@ -1606,7 +1580,7 @@ xfs_dir2_node_addname_int(
 			free->hdr.nvalid = 0;
 			free->hdr.nused = 0;
 		} else {
-			free = fbp->data;
+			free = fbp->b_addr;
 			ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 		}
 
@@ -1639,7 +1613,7 @@ xfs_dir2_node_addname_int(
 		 * We haven't allocated the data entry yet so this will
 		 * change again.
 		 */
-		hdr = dbp->data;
+		hdr = dbp->b_addr;
 		free->bests[findex] = hdr->bestfree[0].length;
 		logfree = 1;
 	}
@@ -1650,22 +1624,17 @@ xfs_dir2_node_addname_int(
 		/*
 		 * If just checking, we succeeded.
 		 */
-		if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-				xfs_da_buf_done(fbp);
+		if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 			return 0;
-		}
+
 		/*
 		 * Read the data block in.
 		 */
-		if (unlikely(
-		    error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
-				-1, &dbp, XFS_DATA_FORK))) {
-			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-				xfs_da_buf_done(fbp);
+		error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
+				-1, &dbp, XFS_DATA_FORK);
+		if (error)
 			return error;
-		}
-		hdr = dbp->data;
+		hdr = dbp->b_addr;
 		logfree = 0;
 	}
 	ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
@@ -1714,16 +1683,10 @@ xfs_dir2_node_addname_int(
 	if (logfree)
 		xfs_dir2_free_log_bests(tp, fbp, findex, findex);
 	/*
-	 * If the caller didn't hand us the freespace block, drop it.
-	 */
-	if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-		xfs_da_buf_done(fbp);
-	/*
 	 * Return the data block and offset in args, then drop the data block.
 	 */
 	args->blkno = (xfs_dablk_t)dbno;
 	args->index = be16_to_cpu(*tagp);
-	xfs_da_buf_done(dbp);
 	return 0;
 }
 
@@ -1761,22 +1724,23 @@ xfs_dir2_node_lookup(
 		/* If a CI match, dup the actual name and return EEXIST */
 		xfs_dir2_data_entry_t	*dep;
 
-		dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
-						data + state->extrablk.index);
+		dep = (xfs_dir2_data_entry_t *)
+			((char *)state->extrablk.bp->b_addr +
+						 state->extrablk.index);
 		rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
 	}
 	/*
 	 * Release the btree blocks and leaf block.
 	 */
 	for (i = 0; i < state->path.active; i++) {
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
 		state->path.blk[i].bp = NULL;
 	}
 	/*
 	 * Release the data block if we have it.
 	 */
 	if (state->extravalid && state->extrablk.bp) {
-		xfs_da_brelse(args->trans, state->extrablk.bp);
+		xfs_trans_brelse(args->trans, state->extrablk.bp);
 		state->extrablk.bp = NULL;
 	}
 	xfs_da_state_free(state);
@@ -1893,13 +1857,13 @@ xfs_dir2_node_replace(
 		 */
 		blk = &state->path.blk[state->path.active - 1];
 		ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-		leaf = blk->bp->data;
+		leaf = blk->bp->b_addr;
 		lep = &leaf->ents[blk->index];
 		ASSERT(state->extravalid);
 		/*
 		 * Point to the data entry.
 		 */
-		hdr = state->extrablk.bp->data;
+		hdr = state->extrablk.bp->b_addr;
 		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)hdr +
@@ -1916,14 +1880,14 @@ xfs_dir2_node_replace(
 	 * Didn't find it, and we're holding a data block.  Drop it.
 	 */
 	else if (state->extravalid) {
-		xfs_da_brelse(args->trans, state->extrablk.bp);
+		xfs_trans_brelse(args->trans, state->extrablk.bp);
 		state->extrablk.bp = NULL;
 	}
 	/*
 	 * Release all the buffers in the cursor.
 	 */
 	for (i = 0; i < state->path.active; i++) {
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
 		state->path.blk[i].bp = NULL;
 	}
 	xfs_da_state_free(state);
@@ -1940,7 +1904,7 @@ xfs_dir2_node_trim_free(
 	xfs_fileoff_t		fo,		/* free block number */
 	int			*rvalp)		/* out: did something */
 {
-	xfs_dabuf_t		*bp;		/* freespace buffer */
+	struct xfs_buf		*bp;		/* freespace buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
 	xfs_dir2_free_t		*free;		/* freespace structure */
@@ -1965,13 +1929,13 @@ xfs_dir2_node_trim_free(
 	if (bp == NULL) {
 		return 0;
 	}
-	free = bp->data;
+	free = bp->b_addr;
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
 	/*
 	 * If there are used entries, there's nothing to do.
 	 */
 	if (be32_to_cpu(free->hdr.nused) > 0) {
-		xfs_da_brelse(tp, bp);
+		xfs_trans_brelse(tp, bp);
 		*rvalp = 0;
 		return 0;
 	}
@@ -1987,7 +1951,7 @@ xfs_dir2_node_trim_free(
 		 * pieces.  This is the last block of an extent.
 		 */
 		ASSERT(error != ENOSPC);
-		xfs_da_brelse(tp, bp);
+		xfs_trans_brelse(tp, bp);
 		return error;
 	}
 	/*
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 067f403..3523d3e 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -25,7 +25,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
 				xfs_dir2_db_t *dbp);
 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-				struct xfs_dabuf *bp);
+				struct xfs_buf *bp);
 extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
 				const unsigned char *name, int len);
 
@@ -37,11 +37,11 @@ extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_block_removename(struct xfs_da_args *args);
 extern int xfs_dir2_block_replace(struct xfs_da_args *args);
 extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+		struct xfs_buf *lbp, struct xfs_buf *dbp);
 
 /* xfs_dir2_data.c */
 #ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
 #else
 #define	xfs_dir2_data_check(dp,bp)
 #endif
@@ -51,43 +51,43 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
 extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
 		struct xfs_dir2_data_hdr *hdr, int *loghead);
 extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-		struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+		struct xfs_buf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
 		struct xfs_dir2_data_entry *dep);
 extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
-		struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+		struct xfs_buf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
 		struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
 		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
 		int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
 		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
 		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
 
 /* xfs_dir2_leaf.c */
 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
-		struct xfs_dabuf *dbp);
+		struct xfs_buf *dbp);
 extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
 extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
-		struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+		struct xfs_buf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp,
 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
 extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
 		size_t bufsize, xfs_off_t *offset, filldir_t filldir);
 extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
-		struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+		struct xfs_buf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
 		int first, int last);
 extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
-		struct xfs_dabuf *bp);
+		struct xfs_buf *bp);
 extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
 extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp);
+		struct xfs_buf *lbp);
 extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+		struct xfs_buf *lbp, xfs_dir2_db_t db);
 extern struct xfs_dir2_leaf_entry *
 xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
 		int lowstale, int highstale,
@@ -96,13 +96,13 @@ extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
 
 /* xfs_dir2_node.c */
 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+		struct xfs_buf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
 		struct xfs_da_args *args, int *indexp,
 		struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
-		struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp,
+		struct xfs_buf *leaf2_bp);
 extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
 	struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
 extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
@@ -122,7 +122,7 @@ extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
 		struct xfs_dir2_sf_entry *sfep);
 extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
 		struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
 		int size, xfs_dir2_sf_hdr_t *sfhp);
 extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
 extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 19bf0c5..1b9fc3e 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -222,7 +222,7 @@ xfs_dir2_block_sfsize(
 int						/* error */
 xfs_dir2_block_to_sf(
 	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dabuf_t		*bp,		/* block buffer */
+	struct xfs_buf		*bp,
 	int			size,		/* shortform directory size */
 	xfs_dir2_sf_hdr_t	*sfhp)		/* shortform directory hdr */
 {
@@ -249,7 +249,7 @@ xfs_dir2_block_to_sf(
 	 * and add local data.
 	 */
 	hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-	memcpy(hdr, bp->data, mp->m_dirblksize);
+	memcpy(hdr, bp->b_addr, mp->m_dirblksize);
 	logflags = XFS_ILOG_CORE;
 	if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
 		ASSERT(error != ENOSPC);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 425f6e9..07f70e1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1491,13 +1491,9 @@ xfs_init_zones(void)
 	if (!xfs_da_state_zone)
 		goto out_destroy_btree_cur_zone;
 
-	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-	if (!xfs_dabuf_zone)
-		goto out_destroy_da_state_zone;
-
 	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
 	if (!xfs_ifork_zone)
-		goto out_destroy_dabuf_zone;
+		goto out_destroy_da_state_zone;
 
 	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
 	if (!xfs_trans_zone)
@@ -1560,8 +1556,6 @@ xfs_init_zones(void)
 	kmem_zone_destroy(xfs_trans_zone);
  out_destroy_ifork_zone:
 	kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
-	kmem_zone_destroy(xfs_dabuf_zone);
  out_destroy_da_state_zone:
 	kmem_zone_destroy(xfs_da_state_zone);
  out_destroy_btree_cur_zone:
@@ -1589,7 +1583,6 @@ xfs_destroy_zones(void)
 	kmem_zone_destroy(xfs_log_item_desc_zone);
 	kmem_zone_destroy(xfs_trans_zone);
 	kmem_zone_destroy(xfs_ifork_zone);
-	kmem_zone_destroy(xfs_dabuf_zone);
 	kmem_zone_destroy(xfs_da_state_zone);
 	kmem_zone_destroy(xfs_btree_cur_zone);
 	kmem_zone_destroy(xfs_bmap_free_item_zone);
-- 
cgit v0.10.2


From 9b73bd7b61f320ffe7bda0126592ccf836d7ef90 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 22 Jun 2012 18:50:15 +1000
Subject: xfs: factor buffer reading from xfs_dir2_leaf_getdents

The buffer reading code in xfs_dir2_leaf_getdents is complex and difficult to
follow due to the readahead and all the context is carries. it is also badly
indented and so difficult to read. Factor it out into a separate function to
make it easier to understand and optimise in future patches.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 69accf6..0b29625 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -759,6 +759,218 @@ xfs_dir2_leaf_compact_x1(
 	*highstalep = highstale;
 }
 
+struct xfs_dir2_leaf_map_info {
+	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
+	xfs_dablk_t	map_off;	/* last mapped file offset */
+	int		map_size;	/* total entries in *map */
+	int		map_valid;	/* valid entries in *map */
+	int		nmap;		/* mappings to ask xfs_bmapi */
+	xfs_dir2_db_t	curdb;		/* db for current block */
+	int		ra_current;	/* number of read-ahead blks */
+	int		ra_index;	/* *map index for read-ahead */
+	int		ra_offset;	/* map entry offset for ra */
+	int		ra_want;	/* readahead count wanted */
+	struct xfs_bmbt_irec map[];	/* map vector for blocks */
+};
+
+STATIC int
+xfs_dir2_leaf_readbuf(
+	struct xfs_inode	*dp,
+	size_t			bufsize,
+	struct xfs_dir2_leaf_map_info *mip,
+	xfs_dir2_off_t		*curoff,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_buf		*bp = *bpp;
+	struct xfs_bmbt_irec	*map = mip->map;
+	int			error = 0;
+	int			length;
+	int			i;
+	int			j;
+
+	/*
+	 * If we have a buffer, we need to release it and
+	 * take it out of the mapping.
+	 */
+
+	if (bp) {
+		xfs_trans_brelse(NULL, bp);
+		bp = NULL;
+		mip->map_blocks -= mp->m_dirblkfsbs;
+		/*
+		 * Loop to get rid of the extents for the
+		 * directory block.
+		 */
+		for (i = mp->m_dirblkfsbs; i > 0; ) {
+			j = min_t(int, map->br_blockcount, i);
+			map->br_blockcount -= j;
+			map->br_startblock += j;
+			map->br_startoff += j;
+			/*
+			 * If mapping is done, pitch it from
+			 * the table.
+			 */
+			if (!map->br_blockcount && --mip->map_valid)
+				memmove(&map[0], &map[1],
+					sizeof(map[0]) * mip->map_valid);
+			i -= j;
+		}
+	}
+
+	/*
+	 * Recalculate the readahead blocks wanted.
+	 */
+	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
+			       mp->m_sb.sb_blocksize) - 1;
+	ASSERT(mip->ra_want >= 0);
+
+	/*
+	 * If we don't have as many as we want, and we haven't
+	 * run out of data blocks, get some more mappings.
+	 */
+	if (1 + mip->ra_want > mip->map_blocks &&
+	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
+		/*
+		 * Get more bmaps, fill in after the ones
+		 * we already have in the table.
+		 */
+		mip->nmap = mip->map_size - mip->map_valid;
+		error = xfs_bmapi_read(dp, mip->map_off,
+				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
+								mip->map_off,
+				&map[mip->map_valid], &mip->nmap, 0);
+
+		/*
+		 * Don't know if we should ignore this or try to return an
+		 * error.  The trouble with returning errors is that readdir
+		 * will just stop without actually passing the error through.
+		 */
+		if (error)
+			goto out;	/* XXX */
+
+		/*
+		 * If we got all the mappings we asked for, set the final map
+		 * offset based on the last bmap value received.  Otherwise,
+		 * we've reached the end.
+		 */
+		if (mip->nmap == mip->map_size - mip->map_valid) {
+			i = mip->map_valid + mip->nmap - 1;
+			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
+		} else
+			mip->map_off = xfs_dir2_byte_to_da(mp,
+							XFS_DIR2_LEAF_OFFSET);
+
+		/*
+		 * Look for holes in the mapping, and eliminate them.  Count up
+		 * the valid blocks.
+		 */
+		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
+			if (map[i].br_startblock == HOLESTARTBLOCK) {
+				mip->nmap--;
+				length = mip->map_valid + mip->nmap - i;
+				if (length)
+					memmove(&map[i], &map[i + 1],
+						sizeof(map[i]) * length);
+			} else {
+				mip->map_blocks += map[i].br_blockcount;
+				i++;
+			}
+		}
+		mip->map_valid += mip->nmap;
+	}
+
+	/*
+	 * No valid mappings, so no more data blocks.
+	 */
+	if (!mip->map_valid) {
+		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
+		goto out;
+	}
+
+	/*
+	 * Read the directory block starting at the first mapping.
+	 */
+	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
+	error = xfs_da_read_buf(NULL, dp, map->br_startoff,
+			map->br_blockcount >= mp->m_dirblkfsbs ?
+			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1,
+			&bp, XFS_DATA_FORK);
+
+	/*
+	 * Should just skip over the data block instead of giving up.
+	 */
+	if (error)
+		goto out;	/* XXX */
+
+	/*
+	 * Adjust the current amount of read-ahead: we just read a block that
+	 * was previously ra.
+	 */
+	if (mip->ra_current)
+		mip->ra_current -= mp->m_dirblkfsbs;
+
+	/*
+	 * Do we need more readahead?
+	 */
+	for (mip->ra_index = mip->ra_offset = i = 0;
+	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
+	     i += mp->m_dirblkfsbs) {
+		ASSERT(mip->ra_index < mip->map_valid);
+		/*
+		 * Read-ahead a contiguous directory block.
+		 */
+		if (i > mip->ra_current &&
+		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
+			xfs_buf_readahead(mp->m_ddev_targp,
+				XFS_FSB_TO_DADDR(mp,
+					map[mip->ra_index].br_startblock +
+							mip->ra_offset),
+				(int)BTOBB(mp->m_dirblksize));
+			mip->ra_current = i;
+		}
+
+		/*
+		 * Read-ahead a non-contiguous directory block.  This doesn't
+		 * use our mapping, but this is a very rare case.
+		 */
+		else if (i > mip->ra_current) {
+			xfs_da_reada_buf(NULL, dp,
+					map[mip->ra_index].br_startoff +
+							mip->ra_offset,
+					XFS_DATA_FORK);
+			mip->ra_current = i;
+		}
+
+		/*
+		 * Advance offset through the mapping table.
+		 */
+		for (j = 0; j < mp->m_dirblkfsbs; j++) {
+			/*
+			 * The rest of this extent but not more than a dir
+			 * block.
+			 */
+			length = min_t(int, mp->m_dirblkfsbs,
+					map[mip->ra_index].br_blockcount -
+							mip->ra_offset);
+			j += length;
+			mip->ra_offset += length;
+
+			/*
+			 * Advance to the next mapping if this one is used up.
+			 */
+			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
+				mip->ra_offset = 0;
+				mip->ra_index++;
+			}
+		}
+	}
+
+out:
+	*bpp = bp;
+	return error;
+}
+
 /*
  * Getdents (readdir) for leaf and node directories.
  * This reads the data blocks only, so is the same for both forms.
@@ -771,30 +983,18 @@ xfs_dir2_leaf_getdents(
 	xfs_off_t		*offset,
 	filldir_t		filldir)
 {
-	struct xfs_buf		*bp;		/* data block buffer */
-	int			byteoff;	/* offset in current block */
-	xfs_dir2_db_t		curdb;		/* db for current block */
-	xfs_dir2_off_t		curoff;		/* current overall offset */
+	struct xfs_buf		*bp = NULL;	/* data block buffer */
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 	int			error = 0;	/* error return value */
-	int			i;		/* temporary loop index */
-	int			j;		/* temporary loop index */
 	int			length;		/* temporary length value */
-	xfs_bmbt_irec_t		*map;		/* map vector for blocks */
-	xfs_extlen_t		map_blocks;	/* number of fsbs in map */
-	xfs_dablk_t		map_off;	/* last mapped file offset */
-	int			map_size;	/* total entries in *map */
-	int			map_valid;	/* valid entries in *map */
 	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			byteoff;	/* offset in current block */
+	xfs_dir2_off_t		curoff;		/* current overall offset */
 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
-	int			nmap;		/* mappings to ask xfs_bmapi */
 	char			*ptr = NULL;	/* pointer to current data */
-	int			ra_current;	/* number of read-ahead blks */
-	int			ra_index;	/* *map index for read-ahead */
-	int			ra_offset;	/* map entry offset for ra */
-	int			ra_want;	/* readahead count wanted */
+	struct xfs_dir2_leaf_map_info *map_info;
 
 	/*
 	 * If the offset is at or past the largest allowed value,
@@ -810,10 +1010,12 @@ xfs_dir2_leaf_getdents(
 	 * buffer size, the directory block size, and the filesystem
 	 * block size.
 	 */
-	map_size = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize);
-	map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP);
-	map_valid = ra_index = ra_offset = ra_current = map_blocks = 0;
-	bp = NULL;
+	length = howmany(bufsize + mp->m_dirblksize,
+				     mp->m_sb.sb_blocksize);
+	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
+				(length * sizeof(struct xfs_bmbt_irec)),
+			       KM_SLEEP);
+	map_info->map_size = length;
 
 	/*
 	 * Inside the loop we keep the main offset value as a byte offset
@@ -825,7 +1027,9 @@ xfs_dir2_leaf_getdents(
 	 * Force this conversion through db so we truncate the offset
 	 * down to get the start of the data block.
 	 */
-	map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
+	map_info->map_off = xfs_dir2_db_to_da(mp,
+					      xfs_dir2_byte_to_db(mp, curoff));
+
 	/*
 	 * Loop over directory entries until we reach the end offset.
 	 * Get more blocks and readahead as necessary.
@@ -836,190 +1040,16 @@ xfs_dir2_leaf_getdents(
 		 * current buffer, need to get another one.
 		 */
 		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
-			/*
-			 * If we have a buffer, we need to release it and
-			 * take it out of the mapping.
-			 */
-			if (bp) {
-				xfs_trans_brelse(NULL, bp);
-				bp = NULL;
-				map_blocks -= mp->m_dirblkfsbs;
-				/*
-				 * Loop to get rid of the extents for the
-				 * directory block.
-				 */
-				for (i = mp->m_dirblkfsbs; i > 0; ) {
-					j = MIN((int)map->br_blockcount, i);
-					map->br_blockcount -= j;
-					map->br_startblock += j;
-					map->br_startoff += j;
-					/*
-					 * If mapping is done, pitch it from
-					 * the table.
-					 */
-					if (!map->br_blockcount && --map_valid)
-						memmove(&map[0], &map[1],
-							sizeof(map[0]) *
-							map_valid);
-					i -= j;
-				}
-			}
-			/*
-			 * Recalculate the readahead blocks wanted.
-			 */
-			ra_want = howmany(bufsize + mp->m_dirblksize,
-					  mp->m_sb.sb_blocksize) - 1;
-			ASSERT(ra_want >= 0);
 
-			/*
-			 * If we don't have as many as we want, and we haven't
-			 * run out of data blocks, get some more mappings.
-			 */
-			if (1 + ra_want > map_blocks &&
-			    map_off <
-			    xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
-				/*
-				 * Get more bmaps, fill in after the ones
-				 * we already have in the table.
-				 */
-				nmap = map_size - map_valid;
-				error = xfs_bmapi_read(dp, map_off,
-					xfs_dir2_byte_to_da(mp,
-						XFS_DIR2_LEAF_OFFSET) - map_off,
-					&map[map_valid], &nmap, 0);
-				/*
-				 * Don't know if we should ignore this or
-				 * try to return an error.
-				 * The trouble with returning errors
-				 * is that readdir will just stop without
-				 * actually passing the error through.
-				 */
-				if (error)
-					break;	/* XXX */
-				/*
-				 * If we got all the mappings we asked for,
-				 * set the final map offset based on the
-				 * last bmap value received.
-				 * Otherwise, we've reached the end.
-				 */
-				if (nmap == map_size - map_valid)
-					map_off =
-					map[map_valid + nmap - 1].br_startoff +
-					map[map_valid + nmap - 1].br_blockcount;
-				else
-					map_off =
-						xfs_dir2_byte_to_da(mp,
-							XFS_DIR2_LEAF_OFFSET);
-				/*
-				 * Look for holes in the mapping, and
-				 * eliminate them.  Count up the valid blocks.
-				 */
-				for (i = map_valid; i < map_valid + nmap; ) {
-					if (map[i].br_startblock ==
-					    HOLESTARTBLOCK) {
-						nmap--;
-						length = map_valid + nmap - i;
-						if (length)
-							memmove(&map[i],
-								&map[i + 1],
-								sizeof(map[i]) *
-								length);
-					} else {
-						map_blocks +=
-							map[i].br_blockcount;
-						i++;
-					}
-				}
-				map_valid += nmap;
-			}
-			/*
-			 * No valid mappings, so no more data blocks.
-			 */
-			if (!map_valid) {
-				curoff = xfs_dir2_da_to_byte(mp, map_off);
+			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
+						      &curoff, &bp);
+			if (error || !map_info->map_valid)
 				break;
-			}
-			/*
-			 * Read the directory block starting at the first
-			 * mapping.
-			 */
-			curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
-			error = xfs_da_read_buf(NULL, dp, map->br_startoff,
-				map->br_blockcount >= mp->m_dirblkfsbs ?
-				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
-				    -1,
-				&bp, XFS_DATA_FORK);
-			/*
-			 * Should just skip over the data block instead
-			 * of giving up.
-			 */
-			if (error)
-				break;	/* XXX */
-			/*
-			 * Adjust the current amount of read-ahead: we just
-			 * read a block that was previously ra.
-			 */
-			if (ra_current)
-				ra_current -= mp->m_dirblkfsbs;
-			/*
-			 * Do we need more readahead?
-			 */
-			for (ra_index = ra_offset = i = 0;
-			     ra_want > ra_current && i < map_blocks;
-			     i += mp->m_dirblkfsbs) {
-				ASSERT(ra_index < map_valid);
-				/*
-				 * Read-ahead a contiguous directory block.
-				 */
-				if (i > ra_current &&
-				    map[ra_index].br_blockcount >=
-				    mp->m_dirblkfsbs) {
-					xfs_buf_readahead(mp->m_ddev_targp,
-						XFS_FSB_TO_DADDR(mp,
-						   map[ra_index].br_startblock +
-						   ra_offset),
-						(int)BTOBB(mp->m_dirblksize));
-					ra_current = i;
-				}
-				/*
-				 * Read-ahead a non-contiguous directory block.
-				 * This doesn't use our mapping, but this
-				 * is a very rare case.
-				 */
-				else if (i > ra_current) {
-					(void)xfs_da_reada_buf(NULL, dp,
-						map[ra_index].br_startoff +
-						ra_offset, XFS_DATA_FORK);
-					ra_current = i;
-				}
-				/*
-				 * Advance offset through the mapping table.
-				 */
-				for (j = 0; j < mp->m_dirblkfsbs; j++) {
-					/*
-					 * The rest of this extent but not
-					 * more than a dir block.
-					 */
-					length = MIN(mp->m_dirblkfsbs,
-						(int)(map[ra_index].br_blockcount -
-						ra_offset));
-					j += length;
-					ra_offset += length;
-					/*
-					 * Advance to the next mapping if
-					 * this one is used up.
-					 */
-					if (ra_offset ==
-					    map[ra_index].br_blockcount) {
-						ra_offset = 0;
-						ra_index++;
-					}
-				}
-			}
+
 			/*
 			 * Having done a read, we need to set a new offset.
 			 */
-			newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
+			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
 			/*
 			 * Start of the current block.
 			 */
@@ -1030,7 +1060,7 @@ xfs_dir2_leaf_getdents(
 			 */
 			else if (curoff > newoff)
 				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
-				       curdb);
+				       map_info->curdb);
 			hdr = bp->b_addr;
 			xfs_dir2_data_check(dp, bp);
 			/*
@@ -1113,7 +1143,7 @@ xfs_dir2_leaf_getdents(
 		*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
 	else
 		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
-	kmem_free(map);
+	kmem_free(map_info);
 	if (bp)
 		xfs_trans_brelse(NULL, bp);
 	return error;
-- 
cgit v0.10.2


From 1f432a887e9a5a5c25be6ac72b5da13652c8bed3 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:42 +1000
Subject: xfs: really fix the cursor leak in xfs_alloc_ag_vextent_near

The current cursor is reallocated when retrying the allocation, so
the existing cursor needs to be destroyed in both the restart and
the failure cases.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7..f654f51 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
 	 * If we couldn't get anything, give up.
 	 */
 	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
 		if (!forced++) {
 			trace_xfs_alloc_near_busy(args);
 			xfs_log_force(args->mp, XFS_LOG_SYNC);
 			goto restart;
 		}
-
-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		trace_xfs_alloc_size_neither(args);
 		args->agbno = NULLAGBLOCK;
 		return 0;
-- 
cgit v0.10.2


From eb71a12e411fe065f8663e12a8d81d561f9502ee Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:43 +1000
Subject: xfs: don't defer metadata allocation to the workqueue

Almost all metadata allocations come from shallow stack usage
situations. Avoid the overhead of switching the allocation to a
workqueue as we are not in danger of running out of stack when
making these allocations. Metadata allocations are already marked
through the args that are passed down, so this is trivial to do.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reported-by: Mel Gorman <mgorman@suse.de>
Tested-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f654f51..4f33c32 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
 	current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
 
-
-int				/* error */
+/*
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
 xfs_alloc_vextent(
-	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+	struct xfs_alloc_arg	*args)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 
+	if (!args->userdata)
+		return __xfs_alloc_vextent(args);
+
+
 	args->done = &done;
 	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
 	queue_work(xfs_alloc_wq, &args->work);
-- 
cgit v0.10.2


From 08023d6dbe840dc4271805a9ea376fcbdee9f744 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 2 Jul 2012 06:00:04 -0400
Subject: xfs: prevent recursion in xfs_buf_iorequest

If the b_iodone handler is run in calling context in xfs_buf_iorequest we
can run into a recursion where xfs_buf_iodone_callbacks keeps calling back
into xfs_buf_iorequest because an I/O error happened, which keeps calling
back into xfs_buf_iorequest.  This chain will usually not take long
because the filesystem gets shut down because of log I/O errors, but even
over a short time it can cause stack overflows if run on the same context.

As a short term workaround make sure we always call the iodone handler in
workqueue context.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 39c5d76..d1edfa1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1355,7 +1355,7 @@ xfs_buf_iorequest(
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
 	_xfs_buf_ioapply(bp);
-	_xfs_buf_ioend(bp, 0);
+	_xfs_buf_ioend(bp, 1);
 
 	xfs_buf_rele(bp);
 }
-- 
cgit v0.10.2


From a2dcf5df5f3813a44423d4a5026666e751ec00dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 13 Jul 2012 02:24:10 -0400
Subject: xfs: do not call xfs_bdstrat_cb in xfs_buf_iodone_callbacks

xfs_bdstrat_cb only adds a check for a shutdown filesystem over
xfs_buf_iorequest, but xfs_buf_iodone_callbacks just checked for a shut down
filesystem a little earlier.  In addition the shutdown handling in
xfs_bdstrat_cb is not very suitable for this caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1edfa1..d7a9dd7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1049,27 +1049,6 @@ xfs_buf_ioerror_alert(
 		(__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
 }
 
-int
-xfs_bwrite(
-	struct xfs_buf		*bp)
-{
-	int			error;
-
-	ASSERT(xfs_buf_islocked(bp));
-
-	bp->b_flags |= XBF_WRITE;
-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
-
-	xfs_bdstrat_cb(bp);
-
-	error = xfs_buf_iowait(bp);
-	if (error) {
-		xfs_force_shutdown(bp->b_target->bt_mount,
-				   SHUTDOWN_META_IO_ERROR);
-	}
-	return error;
-}
-
 /*
  * Called when we want to stop a buffer from getting written or read.
  * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1139,14 +1118,7 @@ xfs_bioerror_relse(
 	return EIO;
 }
 
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
+STATIC int
 xfs_bdstrat_cb(
 	struct xfs_buf	*bp)
 {
@@ -1167,6 +1139,27 @@ xfs_bdstrat_cb(
 	return 0;
 }
 
+int
+xfs_bwrite(
+	struct xfs_buf		*bp)
+{
+	int			error;
+
+	ASSERT(xfs_buf_islocked(bp));
+
+	bp->b_flags |= XBF_WRITE;
+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+
+	xfs_bdstrat_cb(bp);
+
+	error = xfs_buf_iowait(bp);
+	if (error) {
+		xfs_force_shutdown(bp->b_target->bt_mount,
+				   SHUTDOWN_META_IO_ERROR);
+	}
+	return error;
+}
+
 /*
  * Wrapper around bdstrat so that we can stop data from going to disk in case
  * we are shutting down the filesystem.  Typically user data goes thru this
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index aa96bd4..d03b73b 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -250,7 +250,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 extern int xfs_bwrite(struct xfs_buf *bp);
 
 extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern void xfs_buf_ioend(xfs_buf_t *,	int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e4a6e4b..a8d0ed9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1101,7 +1101,7 @@ xfs_buf_iodone_callbacks(
 
 		if (!XFS_BUF_ISSTALE(bp)) {
 			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
-			xfs_bdstrat_cb(bp);
+			xfs_buf_iorequest(bp);
 		} else {
 			xfs_buf_relse(bp);
 		}
-- 
cgit v0.10.2


From 96ee34be7a0965a117080a28b2c0cc6d4ee6adb1 Mon Sep 17 00:00:00 2001
From: Chen Baozi <baozich@gmail.com>
Date: Sat, 14 Jul 2012 03:38:13 +0800
Subject: xfs: fix comment typo of struct xfs_da_blkinfo.

Fix trivial typo error that has written "It" to "Is".

Signed-off-by: Chen Baozi <baozich@gmail.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 9f37aa0..132adaf 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -32,7 +32,7 @@ struct zone;
 /*
  * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
  *
- * Is is used to manage a doubly linked list of all blocks at the same
+ * It is used to manage a doubly linked list of all blocks at the same
  * level in the Btree, and to identify which type of block this is.
  */
 #define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
-- 
cgit v0.10.2


From 69ff2826117f1cde9a2491be57a578212bca551e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 6 Jun 2012 17:01:28 -0400
Subject: xfs: implement ->update_time

Use this new method to replace our hacky use of ->dirty_inode.  An additional
benefit is that we can now propagate errors up the stack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1a25fd8..5439c3f 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -897,6 +897,47 @@ xfs_vn_setattr(
 	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
 }
 
+STATIC int
+xfs_vn_update_time(
+	struct inode		*inode,
+	struct timespec		*now,
+	int			flags)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
+
+	trace_xfs_update_time(ip);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		return -error;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if (flags & S_CTIME) {
+		inode->i_ctime = *now;
+		ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
+		ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
+	}
+	if (flags & S_MTIME) {
+		inode->i_mtime = *now;
+		ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
+		ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
+	}
+	if (flags & S_ATIME) {
+		inode->i_atime = *now;
+		ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
+	}
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+	return -xfs_trans_commit(tp, 0);
+}
+
 #define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
 /*
@@ -991,6 +1032,7 @@ static const struct inode_operations xfs_inode_operations = {
 	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
 	.fiemap			= xfs_vn_fiemap,
+	.update_time		= xfs_vn_update_time,
 };
 
 static const struct inode_operations xfs_dir_inode_operations = {
@@ -1016,6 +1058,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
 	.getxattr		= generic_getxattr,
 	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
+	.update_time		= xfs_vn_update_time,
 };
 
 static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1041,6 +1084,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
 	.getxattr		= generic_getxattr,
 	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
+	.update_time		= xfs_vn_update_time,
 };
 
 static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1054,6 +1098,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
 	.getxattr		= generic_getxattr,
 	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
+	.update_time		= xfs_vn_update_time,
 };
 
 STATIC void
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 07f70e1..cb2deb1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -868,61 +868,6 @@ xfs_fs_inode_init_once(
 		     "xfsino", ip->i_ino);
 }
 
-/*
- * This is called by the VFS when dirtying inode metadata.  This can happen
- * for a few reasons, but we only care about timestamp updates, given that
- * we handled the rest ourselves.  In theory no other calls should happen,
- * but for example generic_write_end() keeps dirtying the inode after
- * updating i_size.  Thus we check that the flags are exactly I_DIRTY_SYNC,
- * and skip this call otherwise.
- *
- * We'll hopefull get a different method just for updating timestamps soon,
- * at which point this hack can go away, and maybe we'll also get real
- * error handling here.
- */
-STATIC void
-xfs_fs_dirty_inode(
-	struct inode		*inode,
-	int			flags)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_trans	*tp;
-	int			error;
-
-	if (flags != I_DIRTY_SYNC)
-		return;
-
-	trace_xfs_dirty_inode(ip);
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		goto trouble;
-	}
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * Grab all the latest timestamps from the Linux inode.
-	 */
-	ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-	ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-	ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-	ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-	ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-	ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
-	error = xfs_trans_commit(tp, 0);
-	if (error)
-		goto trouble;
-	return;
-
-trouble:
-	xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
-}
-
 STATIC void
 xfs_fs_evict_inode(
 	struct inode		*inode)
@@ -1436,7 +1381,6 @@ xfs_fs_free_cached_objects(
 static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
-	.dirty_inode		= xfs_fs_dirty_inode,
 	.evict_inode		= xfs_fs_evict_inode,
 	.drop_inode		= xfs_fs_drop_inode,
 	.put_super		= xfs_fs_put_super,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index caf5dab..e5795dd 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -578,8 +578,8 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
 DEFINE_INODE_EVENT(xfs_dir_fsync);
 DEFINE_INODE_EVENT(xfs_file_fsync);
 DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_dirty_inode);
 DEFINE_INODE_EVENT(xfs_evict_inode);
+DEFINE_INODE_EVENT(xfs_update_time);
 
 DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
 DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-- 
cgit v0.10.2


From 6b7a03f03a2f8b1629133e35729eba4727fae3cc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 3 Jul 2012 12:20:00 -0400
Subject: xfs: handle EOF correctly in xfs_vm_writepage

We need to zero out part of a page which beyond EOF before setting uptodate,
otherwise, mapread or write will see non-zero data beyond EOF.

Based on the code in fs/buffer.c and the following ext4 commit:

  ext4: handle EOF correctly in ext4_bio_write_page()

And yes, I wish we had a good test case for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 84e3725..91d77ac 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -927,11 +927,26 @@ xfs_vm_writepage(
 	end_index = offset >> PAGE_CACHE_SHIFT;
 	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
 	if (page->index >= end_index) {
-		if ((page->index >= end_index + 1) ||
-		    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+
+		/*
+		 * Just skip the page if it is fully outside i_size, e.g. due
+		 * to a truncate operation that is in progress.
+		 */
+		if (page->index >= end_index + 1 || offset_into_page == 0) {
 			unlock_page(page);
 			return 0;
 		}
+
+		/*
+		 * The page straddles i_size.  It must be zeroed out on each
+		 * and every writepage invocation because it may be mmapped.
+		 * "A file is mapped in multiples of the page size.  For a file
+		 * that is not a multiple of the  page size, the remaining
+		 * memory is zeroed when mapped, and writes to that region are
+		 * not written out to the file."
+		 */
+		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
 	}
 
 	end_offset = min_t(unsigned long long,
-- 
cgit v0.10.2


From 475ee413f34165f8e6fcd7fbff3a4da1dce48c99 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 3 Jul 2012 12:21:22 -0400
Subject: xfs: merge xfs_itobp into xfs_imap_to_bp

All callers of xfs_imap_to_bp want the dinode pointer, so let's calculate it
inside xfs_imap_to_bp.  Once that is done xfs_itobp becomes a fairly pointless
wrapper which can be replaced with direct calls to xfs_imap_to_bp.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 257f3c4..34c985d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -132,23 +132,28 @@ xfs_inobp_check(
 #endif
 
 /*
- * Find the buffer associated with the given inode map
- * We do basic validation checks on the buffer once it has been
- * retrieved from disk.
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode.  It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
  */
-STATIC int
+int
 xfs_imap_to_bp(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	struct xfs_imap	*imap,
-	xfs_buf_t	**bpp,
-	uint		buf_flags,
-	uint		iget_flags)
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_imap		*imap,
+	struct xfs_dinode	**dipp,
+	struct xfs_buf		**bpp,
+	uint			buf_flags,
+	uint			iget_flags)
 {
-	int		error;
-	int		i;
-	int		ni;
-	xfs_buf_t	*bp;
+	struct xfs_buf		*bp;
+	int			error;
+	int			i;
+	int			ni;
 
 	buf_flags |= XBF_UNMAPPED;
 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
@@ -189,8 +194,8 @@ xfs_imap_to_bp(
 				xfs_trans_brelse(tp, bp);
 				return XFS_ERROR(EINVAL);
 			}
-			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
-						XFS_ERRLEVEL_HIGH, mp, dip);
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+					     mp, dip);
 #ifdef DEBUG
 			xfs_emerg(mp,
 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -204,7 +209,9 @@ xfs_imap_to_bp(
 	}
 
 	xfs_inobp_check(mp, bp);
+
 	*bpp = bp;
+	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
 	return 0;
 }
 
@@ -240,63 +247,15 @@ xfs_inotobp(
 	if (error)
 		return error;
 
-	error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags);
+	error = xfs_imap_to_bp(mp, tp, &imap, dipp, &bp, 0, imap_flags);
 	if (error)
 		return error;
 
-	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	*offset = imap.im_boffset;
 	return 0;
 }
 
-
-/*
- * This routine is called to map an inode to the buffer containing
- * the on-disk version of the inode.  It returns a pointer to the
- * buffer containing the on-disk inode in the bpp parameter, and in
- * the dip parameter it returns a pointer to the on-disk inode within
- * that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * The inode is expected to already been mapped to its buffer and read
- * in once, thus we can use the mapping information stored in the inode
- * rather than calling xfs_imap().  This allows us to avoid the overhead
- * of looking at the inode btree for small block file systems
- * (see xfs_imap()).
- */
-int
-xfs_itobp(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dinode_t	**dipp,
-	xfs_buf_t	**bpp,
-	uint		buf_flags)
-{
-	xfs_buf_t	*bp;
-	int		error;
-
-	ASSERT(ip->i_imap.im_blkno != 0);
-
-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
-	if (error)
-		return error;
-
-	if (!bp) {
-		ASSERT(buf_flags & XBF_TRYLOCK);
-		ASSERT(tp == NULL);
-		*bpp = NULL;
-		return EAGAIN;
-	}
-
-	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
-	*bpp = bp;
-	return 0;
-}
-
 /*
  * Move inode type and inode format specific information from the
  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
@@ -796,10 +755,9 @@ xfs_iread(
 	/*
 	 * Get pointers to the on-disk inode and the buffer containing it.
 	 */
-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags);
+	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
 	if (error)
 		return error;
-	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
 	/*
 	 * If we got something that isn't an inode it means someone
@@ -876,7 +834,7 @@ xfs_iread(
 	/*
 	 * Use xfs_trans_brelse() to release the buffer containing the
 	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
-	 * in xfs_itobp() above.  If tp is NULL, this is just a normal
+	 * in xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
 	 * will only release the buffer if it is not dirty within the
 	 * transaction.  It will be OK to release the buffer in this case,
@@ -1355,7 +1313,8 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+				       0, 0);
 		if (error)
 			return error;
 
@@ -1429,16 +1388,16 @@ xfs_iunlink_remove(
 
 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
 		/*
-		 * We're at the head of the list.  Get the inode's
-		 * on-disk buffer to see if there is anyone after us
-		 * on the list.  Only modify our next pointer if it
-		 * is not already NULLAGINO.  This saves us the overhead
-		 * of dealing with the buffer when there is no need to
-		 * change it.
+		 * We're at the head of the list.  Get the inode's on-disk
+		 * buffer to see if there is anyone after us on the list.
+		 * Only modify our next pointer if it is not already NULLAGINO.
+		 * This saves us the overhead of dealing with the buffer when
+		 * there is no need to change it.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+				       0, 0);
 		if (error) {
-			xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
+			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
 				__func__, error);
 			return error;
 		}
@@ -1493,13 +1452,15 @@ xfs_iunlink_remove(
 			ASSERT(next_agino != NULLAGINO);
 			ASSERT(next_agino != 0);
 		}
+
 		/*
-		 * Now last_ibp points to the buffer previous to us on
-		 * the unlinked list.  Pull us from the list.
+		 * Now last_ibp points to the buffer previous to us on the
+		 * unlinked list.  Pull us from the list.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+				       0, 0);
 		if (error) {
-			xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
+			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
 				__func__, error);
 			return error;
 		}
@@ -1749,7 +1710,8 @@ xfs_ifree(
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0);
+	error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
+			       0, 0);
 	if (error)
 		return error;
 
@@ -2428,7 +2390,7 @@ xfs_iflush(
 	/*
 	 * For stale inodes we cannot rely on the backing buffer remaining
 	 * stale in cache for the remaining life of the stale inode and so
-	 * xfs_itobp() below may give us a buffer that no longer contains
+	 * xfs_imap_to_bp() below may give us a buffer that no longer contains
 	 * inodes below. We have to check this after ensuring the inode is
 	 * unpinned so that it is safe to reclaim the stale inode after the
 	 * flush call.
@@ -2454,7 +2416,8 @@ xfs_iflush(
 	/*
 	 * Get the buffer containing the on-disk inode.
 	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
+	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
+			       0);
 	if (error || !bp) {
 		xfs_ifunlock(ip);
 		return error;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1efff36..942fd7f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -560,9 +560,9 @@ do { \
 int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
 			    xfs_ino_t, struct xfs_dinode **,
 			    struct xfs_buf **, int *, uint);
-int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
-			  struct xfs_inode *, struct xfs_dinode **,
-			  struct xfs_buf **, uint);
+int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+			       struct xfs_imap *, struct xfs_dinode **,
+			       struct xfs_buf **, uint, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
 			  struct xfs_inode *, uint);
 void		xfs_dinode_to_disk(struct xfs_dinode *,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eff577a..01d10a6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -555,7 +555,7 @@ xfs_bulkstat_single(
 
 	/*
 	 * note that requesting valid inode numbers which are not allocated
-	 * to inodes will most likely cause xfs_itobp to generate warning
+	 * to inodes will most likely cause xfs_imap_to_bp to generate warning
 	 * messages about bad magic numbers. This is ok. The fact that
 	 * the inode isn't actually an inode is handled by the
 	 * error check below. Done this way to make the usual case faster
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a76ba88..5da3ace 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3106,7 +3106,7 @@ xlog_recover_process_one_iunlink(
 	/*
 	 * Get the on disk inode to find the next inode in the bucket.
 	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0);
+	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
 	if (error)
 		goto fail_iput;
 
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 1e9ee06..e61fc15 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -712,8 +712,8 @@ restart:
 	 * Note that xfs_iflush will never block on the inode buffer lock, as
 	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
 	 * ip->i_lock, and we are doing the exact opposite here.  As a result,
-	 * doing a blocking xfs_itobp() to get the cluster buffer would result
-	 * in an ABBA deadlock with xfs_ifree_cluster().
+	 * doing a blocking xfs_imap_to_bp() to get the cluster buffer would
+	 * result in an ABBA deadlock with xfs_ifree_cluster().
 	 *
 	 * As xfs_ifree_cluser() must gather all inodes that are active in the
 	 * cache to mark them stale, if we hit this case we don't actually want
-- 
cgit v0.10.2


From 129dbc9a2d93bab823e57fe47f53d098a0d350f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 3 Jul 2012 12:21:51 -0400
Subject: xfs: remove xfs_inotobp

There is no need to keep this helper around, opencoding it in the only
caller is just as clear.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 34c985d..d48e406 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -216,47 +216,6 @@ xfs_imap_to_bp(
 }
 
 /*
- * This routine is called to map an inode number within a file
- * system to the buffer containing the on-disk version of the
- * inode.  It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dip parameter
- * it returns a pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * Use xfs_imap() to determine the size and location of the
- * buffer to read from disk.
- */
-int
-xfs_inotobp(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	xfs_ino_t	ino,
-	xfs_dinode_t	**dipp,
-	xfs_buf_t	**bpp,
-	int		*offset,
-	uint		imap_flags)
-{
-	struct xfs_imap	imap;
-	xfs_buf_t	*bp;
-	int		error;
-
-	imap.im_blkno = 0;
-	error = xfs_imap(mp, tp, ino, &imap, imap_flags);
-	if (error)
-		return error;
-
-	error = xfs_imap_to_bp(mp, tp, &imap, dipp, &bp, 0, imap_flags);
-	if (error)
-		return error;
-
-	*bpp = bp;
-	*offset = imap.im_boffset;
-	return 0;
-}
-
-/*
  * Move inode type and inode format specific information from the
  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
  * this means set if_rdev to the proper value.  For files, directories,
@@ -1431,23 +1390,32 @@ xfs_iunlink_remove(
 		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
 		last_ibp = NULL;
 		while (next_agino != agino) {
-			/*
-			 * If the last inode wasn't the one pointing to
-			 * us, then release its buffer since we're not
-			 * going to do anything with it.
-			 */
-			if (last_ibp != NULL) {
+			struct xfs_imap	imap;
+
+			if (last_ibp)
 				xfs_trans_brelse(tp, last_ibp);
-			}
+
+			imap.im_blkno = 0;
 			next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
-			error = xfs_inotobp(mp, tp, next_ino, &last_dip,
-					    &last_ibp, &last_offset, 0);
+
+			error = xfs_imap(mp, tp, next_ino, &imap, 0);
+			if (error) {
+				xfs_warn(mp,
+	"%s: xfs_imap returned error %d.",
+					 __func__, error);
+				return error;
+			}
+
+			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
+					       &last_ibp, 0, 0);
 			if (error) {
 				xfs_warn(mp,
-					"%s: xfs_inotobp() returned error %d.",
+	"%s: xfs_imap_to_bp returned error %d.",
 					__func__, error);
 				return error;
 			}
+
+			last_offset = imap.im_boffset;
 			next_agino = be32_to_cpu(last_dip->di_next_unlinked);
 			ASSERT(next_agino != NULLAGINO);
 			ASSERT(next_agino != 0);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 942fd7f..c2e2da3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -557,9 +557,6 @@ do { \
 #define XFS_IGET_UNTRUSTED	0x2
 #define XFS_IGET_DONTCACHE	0x4
 
-int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
-			    xfs_ino_t, struct xfs_dinode **,
-			    struct xfs_buf **, int *, uint);
 int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
 			       struct xfs_imap *, struct xfs_dinode **,
 			       struct xfs_buf **, uint, uint);
-- 
cgit v0.10.2


From 0d882a360b9012bc7a7e921c935774c3fba1bfd9 Mon Sep 17 00:00:00 2001
From: Alain Renaud <arenaud@sgi.com>
Date: Tue, 22 May 2012 15:56:21 -0500
Subject: Prefix IO_XX flags with XFS_IO_XX to avoid namespace colision.

Add a XFS_ prefix to IO_DIRECT,XFS_IO_DELALLOC, XFS_IO_UNWRITTEN and
XFS_IO_OVERWRITE. This to avoid namespace conflict with other modules.

Signed-off-by: Alain Renaud <arenaud@sgi.com>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 91d77ac..15052ff 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -179,7 +179,7 @@ xfs_finish_ioend(
 	if (atomic_dec_and_test(&ioend->io_remaining)) {
 		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
 
-		if (ioend->io_type == IO_UNWRITTEN)
+		if (ioend->io_type == XFS_IO_UNWRITTEN)
 			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 		else if (ioend->io_append_trans)
 			queue_work(mp->m_data_workqueue, &ioend->io_work);
@@ -210,7 +210,7 @@ xfs_end_io(
 	 * For unwritten extents we need to issue transactions to convert a
 	 * range to normal written extens after the data I/O has finished.
 	 */
-	if (ioend->io_type == IO_UNWRITTEN) {
+	if (ioend->io_type == XFS_IO_UNWRITTEN) {
 		/*
 		 * For buffered I/O we never preallocate a transaction when
 		 * doing the unwritten extent conversion, but for direct I/O
@@ -312,7 +312,7 @@ xfs_map_blocks(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -XFS_ERROR(EIO);
 
-	if (type == IO_UNWRITTEN)
+	if (type == XFS_IO_UNWRITTEN)
 		bmapi_flags |= XFS_BMAPI_IGSTATE;
 
 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
@@ -336,7 +336,7 @@ xfs_map_blocks(
 	if (error)
 		return -XFS_ERROR(error);
 
-	if (type == IO_DELALLOC &&
+	if (type == XFS_IO_DELALLOC &&
 	    (!nimaps || isnullstartblock(imap->br_startblock))) {
 		error = xfs_iomap_write_allocate(ip, offset, count, imap);
 		if (!error)
@@ -345,7 +345,7 @@ xfs_map_blocks(
 	}
 
 #ifdef DEBUG
-	if (type == IO_UNWRITTEN) {
+	if (type == XFS_IO_UNWRITTEN) {
 		ASSERT(nimaps);
 		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
 		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
@@ -634,11 +634,11 @@ xfs_check_page_type(
 		bh = head = page_buffers(page);
 		do {
 			if (buffer_unwritten(bh))
-				acceptable += (type == IO_UNWRITTEN);
+				acceptable += (type == XFS_IO_UNWRITTEN);
 			else if (buffer_delay(bh))
-				acceptable += (type == IO_DELALLOC);
+				acceptable += (type == XFS_IO_DELALLOC);
 			else if (buffer_dirty(bh) && buffer_mapped(bh))
-				acceptable += (type == IO_OVERWRITE);
+				acceptable += (type == XFS_IO_OVERWRITE);
 			else
 				break;
 		} while ((bh = bh->b_this_page) != head);
@@ -721,11 +721,11 @@ xfs_convert_page(
 		if (buffer_unwritten(bh) || buffer_delay(bh) ||
 		    buffer_mapped(bh)) {
 			if (buffer_unwritten(bh))
-				type = IO_UNWRITTEN;
+				type = XFS_IO_UNWRITTEN;
 			else if (buffer_delay(bh))
-				type = IO_DELALLOC;
+				type = XFS_IO_DELALLOC;
 			else
-				type = IO_OVERWRITE;
+				type = XFS_IO_OVERWRITE;
 
 			if (!xfs_imap_valid(inode, imap, offset)) {
 				done = 1;
@@ -733,7 +733,7 @@ xfs_convert_page(
 			}
 
 			lock_buffer(bh);
-			if (type != IO_OVERWRITE)
+			if (type != XFS_IO_OVERWRITE)
 				xfs_map_at_offset(inode, bh, imap, offset);
 			xfs_add_to_ioend(inode, bh, offset, type,
 					 ioendp, done);
@@ -831,7 +831,7 @@ xfs_aops_discard_page(
 	struct buffer_head	*bh, *head;
 	loff_t			offset = page_offset(page);
 
-	if (!xfs_check_page_type(page, IO_DELALLOC))
+	if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
 		goto out_invalidate;
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -956,7 +956,7 @@ xfs_vm_writepage(
 
 	bh = head = page_buffers(page);
 	offset = page_offset(page);
-	type = IO_OVERWRITE;
+	type = XFS_IO_OVERWRITE;
 
 	if (wbc->sync_mode == WB_SYNC_NONE)
 		nonblocking = 1;
@@ -981,18 +981,18 @@ xfs_vm_writepage(
 		}
 
 		if (buffer_unwritten(bh)) {
-			if (type != IO_UNWRITTEN) {
-				type = IO_UNWRITTEN;
+			if (type != XFS_IO_UNWRITTEN) {
+				type = XFS_IO_UNWRITTEN;
 				imap_valid = 0;
 			}
 		} else if (buffer_delay(bh)) {
-			if (type != IO_DELALLOC) {
-				type = IO_DELALLOC;
+			if (type != XFS_IO_DELALLOC) {
+				type = XFS_IO_DELALLOC;
 				imap_valid = 0;
 			}
 		} else if (buffer_uptodate(bh)) {
-			if (type != IO_OVERWRITE) {
-				type = IO_OVERWRITE;
+			if (type != XFS_IO_OVERWRITE) {
+				type = XFS_IO_OVERWRITE;
 				imap_valid = 0;
 			}
 		} else {
@@ -1028,7 +1028,7 @@ xfs_vm_writepage(
 		}
 		if (imap_valid) {
 			lock_buffer(bh);
-			if (type != IO_OVERWRITE)
+			if (type != XFS_IO_OVERWRITE)
 				xfs_map_at_offset(inode, bh, &imap, offset);
 			xfs_add_to_ioend(inode, bh, offset, type, &ioend,
 					 new_ioend);
@@ -1069,7 +1069,7 @@ xfs_vm_writepage(
 		 * Reserve log space if we might write beyond the on-disk
 		 * inode size.
 		 */
-		if (ioend->io_type != IO_UNWRITTEN &&
+		if (ioend->io_type != XFS_IO_UNWRITTEN &&
 		    xfs_ioend_is_append(ioend)) {
 			err = xfs_setfilesize_trans_alloc(ioend);
 			if (err)
@@ -1366,7 +1366,7 @@ xfs_end_io_direct_write(
 	ioend->io_iocb = iocb;
 	ioend->io_result = ret;
 	if (private && size > 0)
-		ioend->io_type = IO_UNWRITTEN;
+		ioend->io_type = XFS_IO_UNWRITTEN;
 
 	if (is_async) {
 		ioend->io_isasync = 1;
@@ -1398,7 +1398,7 @@ xfs_vm_direct_IO(
 		 * and converts at least on unwritten extent we will cancel
 		 * the still clean transaction after the I/O has finished.
 		 */
-		iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
+		iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
 		if (offset + size > XFS_I(inode)->i_d.di_size) {
 			ret = xfs_setfilesize_trans_alloc(ioend);
 			if (ret)
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 84eafbc..c325abb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,17 +24,17 @@ extern mempool_t *xfs_ioend_pool;
  * Types of I/O for bmap clustering and I/O completion tracking.
  */
 enum {
-	IO_DIRECT = 0,	/* special case for direct I/O ioends */
-	IO_DELALLOC,	/* mapping covers delalloc region */
-	IO_UNWRITTEN,	/* mapping covers allocated but uninitialized data */
-	IO_OVERWRITE,	/* mapping covers already allocated extent */
+	XFS_IO_DIRECT = 0,	/* special case for direct I/O ioends */
+	XFS_IO_DELALLOC,	/* covers delalloc region */
+	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
+	XFS_IO_OVERWRITE,	/* covers already allocated extent */
 };
 
 #define XFS_IO_TYPES \
 	{ 0,			"" }, \
-	{ IO_DELALLOC,		"delalloc" }, \
-	{ IO_UNWRITTEN,		"unwritten" }, \
-	{ IO_OVERWRITE,		"overwrite" }
+	{ XFS_IO_DELALLOC,		"delalloc" }, \
+	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
+	{ XFS_IO_OVERWRITE,		"overwrite" }
 
 /*
  * xfs_ioend struct manages large extent writes for XFS.
-- 
cgit v0.10.2


From 824c313139c2ce678011bf11c4823a0c99651c1f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:45 -0400
Subject: xfs: remove xfs_ialloc_find_free

This function is entirely trivial and only has one caller, so remove it to
simplify the code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 177a21a..30b816d 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -609,13 +609,6 @@ xfs_ialloc_get_rec(
 /*
  * Visible inode allocation functions.
  */
-/*
- * Find a free (set) bit in the inode bitmask.
- */
-static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
-{
-	return xfs_lowbit64(*fp);
-}
 
 /*
  * Allocate an inode on disk.
@@ -995,7 +988,7 @@ newino:
 	}
 
 alloc_inode:
-	offset = xfs_ialloc_find_free(&rec.ir_free);
+	offset = xfs_lowbit64(rec.ir_free);
 	ASSERT(offset >= 0);
 	ASSERT(offset < XFS_INODES_PER_CHUNK);
 	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
-- 
cgit v0.10.2


From f2ecc5e453134a13c3b2b0f2cac52ab2d5c540d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:46 -0400
Subject: xfs: split xfs_dialloc

Move the actual allocation once we have selected an allocation group into a
separate helper, and make xfs_dialloc a wrapper around it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 30b816d..a124b9f8 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -607,188 +607,35 @@ xfs_ialloc_get_rec(
 }
 
 /*
- * Visible inode allocation functions.
- */
-
-/*
- * Allocate an inode on disk.
- * Mode is used to tell whether the new inode will need space, and whether
- * it is a directory.
+ * Allocate an inode.
  *
- * The arguments IO_agbp and alloc_done are defined to work within
- * the constraint of one allocation per transaction.
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes.  On the first call,
- * IO_agbp should be set to NULL. If an inode is available,
- * i.e., xfs_dialloc() did not need to do an allocation, an inode
- * number is returned.  In this case, IO_agbp would be set to the
- * current ag_buf and alloc_done set to false.
- * If an allocation needed to be done, xfs_dialloc would return
- * the current ag_buf in IO_agbp and set alloc_done to true.
- * The caller should then commit the current transaction, allocate a new
- * transaction, and call xfs_dialloc() again, passing in the previous
- * value of IO_agbp.  IO_agbp should be held across the transactions.
- * Since the agbp is locked across the two calls, the second call is
- * guaranteed to have a free inode available.
- *
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated.  The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
  */
-int
-xfs_dialloc(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_ino_t	parent,		/* parent inode (directory) */
-	umode_t		mode,		/* mode bits for new inode */
-	int		okalloc,	/* ok to allocate more space */
-	xfs_buf_t	**IO_agbp,	/* in/out ag header's buffer */
-	boolean_t	*alloc_done,	/* true if we needed to replenish
-					   inode freelist */
-	xfs_ino_t	*inop)		/* inode number allocated */
+STATIC int
+xfs_dialloc_ag(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_ino_t		parent,
+	xfs_ino_t		*inop)
 {
-	xfs_agnumber_t	agcount;	/* number of allocation groups */
-	xfs_buf_t	*agbp;		/* allocation group header's buffer */
-	xfs_agnumber_t	agno;		/* allocation group number */
-	xfs_agi_t	*agi;		/* allocation group header structure */
-	xfs_btree_cur_t	*cur;		/* inode allocation btree cursor */
-	int		error;		/* error return value */
-	int		i;		/* result code */
-	int		ialloced;	/* inode allocation status */
-	int		noroom = 0;	/* no space for inode blk allocation */
-	xfs_ino_t	ino;		/* fs-relative inode to be returned */
-	/* REFERENCED */
-	int		j;		/* result code */
-	xfs_mount_t	*mp;		/* file system mount structure */
-	int		offset;		/* index of inode in chunk */
-	xfs_agino_t	pagino;		/* parent's AG relative inode # */
-	xfs_agnumber_t	pagno;		/* parent's AG number */
-	xfs_inobt_rec_incore_t rec;	/* inode allocation record */
-	xfs_agnumber_t	tagno;		/* testing allocation group number */
-	xfs_btree_cur_t	*tcur;		/* temp cursor */
-	xfs_inobt_rec_incore_t trec;	/* temp inode allocation record */
-	struct xfs_perag *pag;
-
-
-	if (*IO_agbp == NULL) {
-		/*
-		 * We do not have an agbp, so select an initial allocation
-		 * group for inode allocation.
-		 */
-		agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
-		/*
-		 * Couldn't find an allocation group satisfying the
-		 * criteria, give up.
-		 */
-		if (!agbp) {
-			*inop = NULLFSINO;
-			return 0;
-		}
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-	} else {
-		/*
-		 * Continue where we left off before.  In this case, we
-		 * know that the allocation group has free inodes.
-		 */
-		agbp = *IO_agbp;
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-	}
-	mp = tp->t_mountp;
-	agcount = mp->m_sb.sb_agcount;
-	agno = be32_to_cpu(agi->agi_seqno);
-	tagno = agno;
-	pagno = XFS_INO_TO_AGNO(mp, parent);
-	pagino = XFS_INO_TO_AGINO(mp, parent);
-
-	/*
-	 * If we have already hit the ceiling of inode blocks then clear
-	 * okalloc so we scan all available agi structures for a free
-	 * inode.
-	 */
-
-	if (mp->m_maxicount &&
-	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
-		noroom = 1;
-		okalloc = 0;
-	}
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t		agno = be32_to_cpu(agi->agi_seqno);
+	xfs_agnumber_t		pagno = XFS_INO_TO_AGNO(mp, parent);
+	xfs_agino_t		pagino = XFS_INO_TO_AGINO(mp, parent);
+	struct xfs_perag	*pag;
+	struct xfs_btree_cur	*cur, *tcur;
+	struct xfs_inobt_rec_incore rec, trec;
+	xfs_ino_t		ino;
+	int			error;
+	int			offset;
+	int			i, j;
 
-	/*
-	 * Loop until we find an allocation group that either has free inodes
-	 * or in which we can allocate some inodes.  Iterate through the
-	 * allocation groups upward, wrapping at the end.
-	 */
-	*alloc_done = B_FALSE;
-	while (!agi->agi_freecount) {
-		/*
-		 * Don't do anything if we're not supposed to allocate
-		 * any blocks, just go on to the next ag.
-		 */
-		if (okalloc) {
-			/*
-			 * Try to allocate some new inodes in the allocation
-			 * group.
-			 */
-			if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
-				xfs_trans_brelse(tp, agbp);
-				if (error == ENOSPC) {
-					*inop = NULLFSINO;
-					return 0;
-				} else
-					return error;
-			}
-			if (ialloced) {
-				/*
-				 * We successfully allocated some inodes, return
-				 * the current context to the caller so that it
-				 * can commit the current transaction and call
-				 * us again where we left off.
-				 */
-				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-				*alloc_done = B_TRUE;
-				*IO_agbp = agbp;
-				*inop = NULLFSINO;
-				return 0;
-			}
-		}
-		/*
-		 * If it failed, give up on this ag.
-		 */
-		xfs_trans_brelse(tp, agbp);
-		/*
-		 * Go on to the next ag: get its ag header.
-		 */
-nextag:
-		if (++tagno == agcount)
-			tagno = 0;
-		if (tagno == agno) {
-			*inop = NULLFSINO;
-			return noroom ? ENOSPC : 0;
-		}
-		pag = xfs_perag_get(mp, tagno);
-		if (pag->pagi_inodeok == 0) {
-			xfs_perag_put(pag);
-			goto nextag;
-		}
-		error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
-		xfs_perag_put(pag);
-		if (error)
-			goto nextag;
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-	}
-	/*
-	 * Here with an allocation group that has a free inode.
-	 * Reset agno since we may have chosen a new ag in the
-	 * loop above.
-	 */
-	agno = tagno;
-	*IO_agbp = NULL;
 	pag = xfs_perag_get(mp, agno);
 
  restart_pagno:
-	cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
 	/*
 	 * If pagino is 0 (this is the root inode allocation) use newino.
 	 * This must work because we've just allocated some.
@@ -1021,6 +868,158 @@ error0:
 }
 
 /*
+ * Allocate an inode on disk.
+ *
+ * Mode is used to tell whether the new inode will need space, and whether it
+ * is a directory.
+ *
+ * This function is designed to be called twice if it has to do an allocation
+ * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
+ * If an inode is available without having to performn an allocation, an inode
+ * number is returned.  In this case, *IO_agbp would be NULL.  If an allocation
+ * needes to be done, xfs_dialloc would return the current AGI buffer in
+ * *IO_agbp.  The caller should then commit the current transaction, allocate a
+ * new transaction, and call xfs_dialloc() again, passing in the previous value
+ * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
+ * buffer is locked across the two calls, the second call is guaranteed to have
+ * a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the on-disk
+ * data structures are updated.  The inode itself is not read in, since doing so
+ * would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+	struct xfs_trans	*tp,
+	xfs_ino_t		parent,
+	umode_t			mode,
+	int			okalloc,
+	struct xfs_buf		**IO_agbp,
+	boolean_t		*alloc_done,
+	xfs_ino_t		*inop)
+{
+	struct xfs_buf		*agbp;
+	xfs_agnumber_t		agno;
+	struct xfs_agi		*agi;
+	int			error;
+	int			ialloced;
+	int			noroom = 0;
+	struct xfs_mount	*mp;
+	xfs_agnumber_t		tagno;
+	struct xfs_perag	*pag;
+
+	if (*IO_agbp == NULL) {
+		/*
+		 * We do not have an agbp, so select an initial allocation
+		 * group for inode allocation.
+		 */
+		agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+		/*
+		 * Couldn't find an allocation group satisfying the
+		 * criteria, give up.
+		 */
+		if (!agbp) {
+			*inop = NULLFSINO;
+			return 0;
+		}
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+	} else {
+		/*
+		 * Continue where we left off before.  In this case, we
+		 * know that the allocation group has free inodes.
+		 */
+		agbp = *IO_agbp;
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
+	}
+	mp = tp->t_mountp;
+	agno = be32_to_cpu(agi->agi_seqno);
+	tagno = agno;
+
+	/*
+	 * If we have already hit the ceiling of inode blocks then clear
+	 * okalloc so we scan all available agi structures for a free
+	 * inode.
+	 */
+
+	if (mp->m_maxicount &&
+	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+		noroom = 1;
+		okalloc = 0;
+	}
+
+	/*
+	 * Loop until we find an allocation group that either has free inodes
+	 * or in which we can allocate some inodes.  Iterate through the
+	 * allocation groups upward, wrapping at the end.
+	 */
+	*alloc_done = B_FALSE;
+	while (!agi->agi_freecount) {
+		/*
+		 * Don't do anything if we're not supposed to allocate
+		 * any blocks, just go on to the next ag.
+		 */
+		if (okalloc) {
+			/*
+			 * Try to allocate some new inodes in the allocation
+			 * group.
+			 */
+			if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
+				xfs_trans_brelse(tp, agbp);
+				if (error == ENOSPC) {
+					*inop = NULLFSINO;
+					return 0;
+				} else
+					return error;
+			}
+			if (ialloced) {
+				/*
+				 * We successfully allocated some inodes, return
+				 * the current context to the caller so that it
+				 * can commit the current transaction and call
+				 * us again where we left off.
+				 */
+				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
+				*alloc_done = B_TRUE;
+				*IO_agbp = agbp;
+				*inop = NULLFSINO;
+				return 0;
+			}
+		}
+		/*
+		 * If it failed, give up on this ag.
+		 */
+		xfs_trans_brelse(tp, agbp);
+		/*
+		 * Go on to the next ag: get its ag header.
+		 */
+nextag:
+		if (++tagno == mp->m_sb.sb_agcount)
+			tagno = 0;
+		if (tagno == agno) {
+			*inop = NULLFSINO;
+			return noroom ? ENOSPC : 0;
+		}
+		pag = xfs_perag_get(mp, tagno);
+		if (pag->pagi_inodeok == 0) {
+			xfs_perag_put(pag);
+			goto nextag;
+		}
+		error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
+		xfs_perag_put(pag);
+		if (error)
+			goto nextag;
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+	}
+
+	*IO_agbp = NULL;
+	return xfs_dialloc_ag(tp, agbp, parent, inop);
+}
+
+/*
  * Free disk inode.  Carefully avoids touching the incore inode, all
  * manipulations incore are the caller's responsibility.
  * The on-disk inode is not changed by this operation, only the
-- 
cgit v0.10.2


From 08358906ed78f6ab4d3ff8e4fd1b87b9a4aea645 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:47 -0400
Subject: xfs: remove the alloc_done argument to xfs_dialloc

We can simplify check the IO_agbp pointer for being non-NULL instead of
passing another argument through two layers of function calls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a124b9f8..2b70952 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -895,7 +895,6 @@ xfs_dialloc(
 	umode_t			mode,
 	int			okalloc,
 	struct xfs_buf		**IO_agbp,
-	boolean_t		*alloc_done,
 	xfs_ino_t		*inop)
 {
 	struct xfs_buf		*agbp;
@@ -955,7 +954,6 @@ xfs_dialloc(
 	 * or in which we can allocate some inodes.  Iterate through the
 	 * allocation groups upward, wrapping at the end.
 	 */
-	*alloc_done = B_FALSE;
 	while (!agi->agi_freecount) {
 		/*
 		 * Don't do anything if we're not supposed to allocate
@@ -982,7 +980,6 @@ xfs_dialloc(
 				 * us again where we left off.
 				 */
 				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-				*alloc_done = B_TRUE;
 				*IO_agbp = agbp;
 				*inop = NULLFSINO;
 				return 0;
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 65ac57c..1fd6ea4 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -75,8 +75,6 @@ xfs_dialloc(
 	umode_t		mode,		/* mode bits for new inode */
 	int		okalloc,	/* ok to allocate more space */
 	struct xfs_buf	**agbp,		/* buf for a.g. inode header */
-	boolean_t	*alloc_done,	/* an allocation was done to replenish
-					   the free inodes */
 	xfs_ino_t	*inop);		/* inode number allocated */
 
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d48e406..5c10825 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -887,7 +887,6 @@ xfs_ialloc(
 	prid_t		prid,
 	int		okalloc,
 	xfs_buf_t	**ialloc_context,
-	boolean_t	*call_again,
 	xfs_inode_t	**ipp)
 {
 	xfs_ino_t	ino;
@@ -902,10 +901,10 @@ xfs_ialloc(
 	 * the on-disk inode to be allocated.
 	 */
 	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
-			    ialloc_context, call_again, &ino);
+			    ialloc_context, &ino);
 	if (error)
 		return error;
-	if (*call_again || ino == NULLFSINO) {
+	if (*ialloc_context || ino == NULLFSINO) {
 		*ipp = NULL;
 		return 0;
 	}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c2e2da3..04d2fe4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -517,7 +517,7 @@ void		xfs_inode_free(struct xfs_inode *ip);
  */
 int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
 			   xfs_nlink_t, xfs_dev_t, prid_t, int,
-			   struct xfs_buf **, boolean_t *, xfs_inode_t **);
+			   struct xfs_buf **, xfs_inode_t **);
 
 uint		xfs_ip2xflags(struct xfs_inode *);
 uint		xfs_dic2xflags(struct xfs_dinode *);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 4e5b9ad..0025c78 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -65,7 +65,6 @@ xfs_dir_ialloc(
 	xfs_trans_t	*ntp;
 	xfs_inode_t	*ip;
 	xfs_buf_t	*ialloc_context = NULL;
-	boolean_t	call_again = B_FALSE;
 	int		code;
 	uint		log_res;
 	uint		log_count;
@@ -91,7 +90,7 @@ xfs_dir_ialloc(
 	 * the inode(s) that we've just allocated.
 	 */
 	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
-			  &ialloc_context, &call_again, &ip);
+			  &ialloc_context, &ip);
 
 	/*
 	 * Return an error if we were unable to allocate a new inode.
@@ -102,19 +101,18 @@ xfs_dir_ialloc(
 		*ipp = NULL;
 		return code;
 	}
-	if (!call_again && (ip == NULL)) {
+	if (!ialloc_context && !ip) {
 		*ipp = NULL;
 		return XFS_ERROR(ENOSPC);
 	}
 
 	/*
-	 * If call_again is set, then we were unable to get an
+	 * If the AGI buffer is non-NULL, then we were unable to get an
 	 * inode in one operation.  We need to commit the current
 	 * transaction and call xfs_ialloc() again.  It is guaranteed
 	 * to succeed the second time.
 	 */
-	if (call_again) {
-
+	if (ialloc_context) {
 		/*
 		 * Normally, xfs_trans_commit releases all the locks.
 		 * We call bhold to hang on to the ialloc_context across
@@ -195,7 +193,7 @@ xfs_dir_ialloc(
 		 * this call should always succeed.
 		 */
 		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-				  okalloc, &ialloc_context, &call_again, &ip);
+				  okalloc, &ialloc_context, &ip);
 
 		/*
 		 * If we get an error at this point, return to the caller
@@ -206,12 +204,11 @@ xfs_dir_ialloc(
 			*ipp = NULL;
 			return code;
 		}
-		ASSERT ((!call_again) && (ip != NULL));
+		ASSERT(!ialloc_context && ip);
 
 	} else {
-		if (committed != NULL) {
+		if (committed != NULL)
 			*committed = 0;
-		}
 	}
 
 	*ipp = ip;
-- 
cgit v0.10.2


From 4bb61069d2019dea2a7e4e0f4432101f03a9b820 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:48 -0400
Subject: xfs: add a short cut to xfs_dialloc for the non-NULL agbp case

In this case we already have selected an AG and know it has free space
beause the buffer lock never got released.  Jump directly into xfs_dialloc_ag
and short cut the AG selection loop.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 2b70952..7aa8a02 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -634,6 +634,10 @@ xfs_dialloc_ag(
 
 	pag = xfs_perag_get(mp, agno);
 
+	ASSERT(pag->pagi_init);
+	ASSERT(pag->pagi_inodeok);
+	ASSERT(pag->pagi_freecount > 0);
+
  restart_pagno:
 	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
 	/*
@@ -907,32 +911,32 @@ xfs_dialloc(
 	xfs_agnumber_t		tagno;
 	struct xfs_perag	*pag;
 
-	if (*IO_agbp == NULL) {
-		/*
-		 * We do not have an agbp, so select an initial allocation
-		 * group for inode allocation.
-		 */
-		agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+	if (*IO_agbp) {
 		/*
-		 * Couldn't find an allocation group satisfying the
-		 * criteria, give up.
-		 */
-		if (!agbp) {
-			*inop = NULLFSINO;
-			return 0;
-		}
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-	} else {
-		/*
-		 * Continue where we left off before.  In this case, we
+		 * If the caller passes in a pointer to the AGI buffer,
+		 * continue where we left off before.  In this case, we
 		 * know that the allocation group has free inodes.
 		 */
 		agbp = *IO_agbp;
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
+		goto out_alloc;
 	}
+
+	/*
+	 * We do not have an agbp, so select an initial allocation
+	 * group for inode allocation.
+	 */
+	agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+
+	/*
+	 * Couldn't find an allocation group satisfying the
+	 * criteria, give up.
+	 */
+	if (!agbp) {
+		*inop = NULLFSINO;
+		return 0;
+	}
+	agi = XFS_BUF_TO_AGI(agbp);
+
 	mp = tp->t_mountp;
 	agno = be32_to_cpu(agi->agi_seqno);
 	tagno = agno;
@@ -1012,6 +1016,7 @@ nextag:
 		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
 	}
 
+out_alloc:
 	*IO_agbp = NULL;
 	return xfs_dialloc_ag(tp, agbp, parent, inop);
 }
-- 
cgit v0.10.2


From 55d6af64cb8bf8c7e9a84b254d2c3479be8c067c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:49 -0400
Subject: xfs: refactor xfs_ialloc_ag_select

Loop over the in-core perag structures and prefer using pagi_freecount over
going out to the AGI buffer where possible.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 7aa8a02..ecb9f22 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -442,14 +442,13 @@ xfs_ialloc_next_ag(
  * Select an allocation group to look for a free inode in, based on the parent
  * inode and then mode.  Return the allocation group buffer.
  */
-STATIC xfs_buf_t *			/* allocation group buffer */
+STATIC xfs_agnumber_t
 xfs_ialloc_ag_select(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_ino_t	parent,		/* parent directory inode number */
 	umode_t		mode,		/* bits set to indicate file type */
 	int		okalloc)	/* ok to allocate more space */
 {
-	xfs_buf_t	*agbp;		/* allocation group header buffer */
 	xfs_agnumber_t	agcount;	/* number of ag's in the filesystem */
 	xfs_agnumber_t	agno;		/* current ag number */
 	int		flags;		/* alloc buffer locking flags */
@@ -459,6 +458,7 @@ xfs_ialloc_ag_select(
 	int		needspace;	/* file mode implies space allocated */
 	xfs_perag_t	*pag;		/* per allocation group data */
 	xfs_agnumber_t	pagno;		/* parent (starting) ag number */
+	int		error;
 
 	/*
 	 * Files of these types need at least one block if length > 0
@@ -474,7 +474,9 @@ xfs_ialloc_ag_select(
 		if (pagno >= agcount)
 			pagno = 0;
 	}
+
 	ASSERT(pagno < agcount);
+
 	/*
 	 * Loop through allocation groups, looking for one with a little
 	 * free space in it.  Note we don't look for free inodes, exactly.
@@ -486,51 +488,45 @@ xfs_ialloc_ag_select(
 	flags = XFS_ALLOC_FLAG_TRYLOCK;
 	for (;;) {
 		pag = xfs_perag_get(mp, agno);
+		if (!pag->pagi_inodeok) {
+			xfs_ialloc_next_ag(mp);
+			goto nextag;
+		}
+
 		if (!pag->pagi_init) {
-			if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-				agbp = NULL;
+			error = xfs_ialloc_pagi_init(mp, tp, agno);
+			if (error)
 				goto nextag;
-			}
-		} else
-			agbp = NULL;
+		}
 
-		if (!pag->pagi_inodeok) {
-			xfs_ialloc_next_ag(mp);
-			goto unlock_nextag;
+		if (pag->pagi_freecount) {
+			xfs_perag_put(pag);
+			return agno;
 		}
 
-		/*
-		 * Is there enough free space for the file plus a block
-		 * of inodes (if we need to allocate some)?
-		 */
-		ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
-		if (ineed && !pag->pagf_init) {
-			if (agbp == NULL &&
-			    xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-				agbp = NULL;
+		if (!okalloc)
+			goto nextag;
+
+		if (!pag->pagf_init) {
+			error = xfs_alloc_pagf_init(mp, tp, agno, flags);
+			if (error)
 				goto nextag;
-			}
-			(void)xfs_alloc_pagf_init(mp, tp, agno, flags);
 		}
-		if (!ineed || pag->pagf_init) {
-			if (ineed && !(longest = pag->pagf_longest))
-				longest = pag->pagf_flcount > 0;
-			if (!ineed ||
-			    (pag->pagf_freeblks >= needspace + ineed &&
-			     longest >= ineed &&
-			     okalloc)) {
-				if (agbp == NULL &&
-				    xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-					agbp = NULL;
-					goto nextag;
-				}
-				xfs_perag_put(pag);
-				return agbp;
-			}
+
+		/*
+		 * Is there enough free space for the file plus a block of
+		 * inodes? (if we need to allocate some)?
+		 */
+		ineed = XFS_IALLOC_BLOCKS(mp);
+		longest = pag->pagf_longest;
+		if (!longest)
+			longest = pag->pagf_flcount > 0;
+
+		if (pag->pagf_freeblks >= needspace + ineed &&
+		    longest >= ineed) {
+			xfs_perag_put(pag);
+			return agno;
 		}
-unlock_nextag:
-		if (agbp)
-			xfs_trans_brelse(tp, agbp);
 nextag:
 		xfs_perag_put(pag);
 		/*
@@ -538,13 +534,13 @@ nextag:
 		 * down.
 		 */
 		if (XFS_FORCED_SHUTDOWN(mp))
-			return NULL;
+			return NULLAGNUMBER;
 		agno++;
 		if (agno >= agcount)
 			agno = 0;
 		if (agno == pagno) {
 			if (flags == 0)
-				return NULL;
+				return NULLAGNUMBER;
 			flags = 0;
 		}
 	}
@@ -901,13 +897,13 @@ xfs_dialloc(
 	struct xfs_buf		**IO_agbp,
 	xfs_ino_t		*inop)
 {
+	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_buf		*agbp;
 	xfs_agnumber_t		agno;
 	struct xfs_agi		*agi;
 	int			error;
 	int			ialloced;
 	int			noroom = 0;
-	struct xfs_mount	*mp;
 	xfs_agnumber_t		tagno;
 	struct xfs_perag	*pag;
 
@@ -925,20 +921,17 @@ xfs_dialloc(
 	 * We do not have an agbp, so select an initial allocation
 	 * group for inode allocation.
 	 */
-	agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
-
-	/*
-	 * Couldn't find an allocation group satisfying the
-	 * criteria, give up.
-	 */
-	if (!agbp) {
+	agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+	if (agno == NULLAGNUMBER) {
 		*inop = NULLFSINO;
 		return 0;
 	}
+
+	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+	if (error)
+		return XFS_ERROR(error);
 	agi = XFS_BUF_TO_AGI(agbp);
 
-	mp = tp->t_mountp;
-	agno = be32_to_cpu(agi->agi_seqno);
 	tagno = agno;
 
 	/*
-- 
cgit v0.10.2


From be60fe54b216a62403b816d3930a66ad7c51cbc6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 10:54:50 -0400
Subject: xfs: do not read the AGI buffer in xfs_dialloc until nessecary

Refactor the AG selection loop in xfs_dialloc to operate on the in-memory
perag data as much as possible.  We only read the AGI buffer once we have
selected an AG to allocate inodes now instead of for every AG considered.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index ecb9f22..21e37b5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -900,11 +900,10 @@ xfs_dialloc(
 	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_buf		*agbp;
 	xfs_agnumber_t		agno;
-	struct xfs_agi		*agi;
 	int			error;
 	int			ialloced;
 	int			noroom = 0;
-	xfs_agnumber_t		tagno;
+	xfs_agnumber_t		start_agno;
 	struct xfs_perag	*pag;
 
 	if (*IO_agbp) {
@@ -921,25 +920,17 @@ xfs_dialloc(
 	 * We do not have an agbp, so select an initial allocation
 	 * group for inode allocation.
 	 */
-	agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
-	if (agno == NULLAGNUMBER) {
+	start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+	if (start_agno == NULLAGNUMBER) {
 		*inop = NULLFSINO;
 		return 0;
 	}
 
-	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-	if (error)
-		return XFS_ERROR(error);
-	agi = XFS_BUF_TO_AGI(agbp);
-
-	tagno = agno;
-
 	/*
 	 * If we have already hit the ceiling of inode blocks then clear
 	 * okalloc so we scan all available agi structures for a free
 	 * inode.
 	 */
-
 	if (mp->m_maxicount &&
 	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
 		noroom = 1;
@@ -951,67 +942,87 @@ xfs_dialloc(
 	 * or in which we can allocate some inodes.  Iterate through the
 	 * allocation groups upward, wrapping at the end.
 	 */
-	while (!agi->agi_freecount) {
-		/*
-		 * Don't do anything if we're not supposed to allocate
-		 * any blocks, just go on to the next ag.
-		 */
-		if (okalloc) {
-			/*
-			 * Try to allocate some new inodes in the allocation
-			 * group.
-			 */
-			if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
-				xfs_trans_brelse(tp, agbp);
-				if (error == ENOSPC) {
-					*inop = NULLFSINO;
-					return 0;
-				} else
-					return error;
-			}
-			if (ialloced) {
-				/*
-				 * We successfully allocated some inodes, return
-				 * the current context to the caller so that it
-				 * can commit the current transaction and call
-				 * us again where we left off.
-				 */
-				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-				*IO_agbp = agbp;
-				*inop = NULLFSINO;
-				return 0;
-			}
+	agno = start_agno;
+	for (;;) {
+		pag = xfs_perag_get(mp, agno);
+		if (!pag->pagi_inodeok) {
+			xfs_ialloc_next_ag(mp);
+			goto nextag;
+		}
+
+		if (!pag->pagi_init) {
+			error = xfs_ialloc_pagi_init(mp, tp, agno);
+			if (error)
+				goto out_error;
 		}
+
 		/*
-		 * If it failed, give up on this ag.
+		 * Do a first racy fast path check if this AG is usable.
 		 */
-		xfs_trans_brelse(tp, agbp);
+		if (!pag->pagi_freecount && !okalloc)
+			goto nextag;
+
+		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+		if (error)
+			goto out_error;
+
 		/*
-		 * Go on to the next ag: get its ag header.
+		 * Once the AGI has been read in we have to recheck
+		 * pagi_freecount with the AGI buffer lock held.
 		 */
-nextag:
-		if (++tagno == mp->m_sb.sb_agcount)
-			tagno = 0;
-		if (tagno == agno) {
+		if (pag->pagi_freecount) {
+			xfs_perag_put(pag);
+			goto out_alloc;
+		}
+
+		if (!okalloc) {
+			xfs_trans_brelse(tp, agbp);
+			goto nextag;
+		}
+
+		error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
+		if (error) {
+			xfs_trans_brelse(tp, agbp);
+
+			if (error != ENOSPC)
+				goto out_error;
+
+			xfs_perag_put(pag);
 			*inop = NULLFSINO;
-			return noroom ? ENOSPC : 0;
+			return 0;
 		}
-		pag = xfs_perag_get(mp, tagno);
-		if (pag->pagi_inodeok == 0) {
+
+		if (ialloced) {
+			/*
+			 * We successfully allocated some inodes, return
+			 * the current context to the caller so that it
+			 * can commit the current transaction and call
+			 * us again where we left off.
+			 */
+			ASSERT(pag->pagi_freecount > 0);
 			xfs_perag_put(pag);
-			goto nextag;
+
+			*IO_agbp = agbp;
+			*inop = NULLFSINO;
+			return 0;
 		}
-		error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
+
+nextag:
 		xfs_perag_put(pag);
-		if (error)
-			goto nextag;
-		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+		if (++agno == mp->m_sb.sb_agcount)
+			agno = 0;
+		if (agno == start_agno) {
+			*inop = NULLFSINO;
+			return noroom ? ENOSPC : 0;
+		}
 	}
 
 out_alloc:
 	*IO_agbp = NULL;
 	return xfs_dialloc_ag(tp, agbp, parent, inop);
+out_error:
+	xfs_perag_put(pag);
+	return XFS_ERROR(error);
 }
 
 /*
-- 
cgit v0.10.2


From b373e98daa70d7ddb10f53f81e711c4d17651795 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 11:13:29 -0400
Subject: xfs: clean up xfs_inactive

The code to reserve log space and join the inode to the transaction is
common for all cases, so don't duplicate it.  Also remove the trivial
xfs_inactive_symlink_local helper which can simply be opencode now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c22f4e0..f9a5157 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -282,23 +282,15 @@ xfs_inactive_symlink_rmt(
 	 * free them all in one bunmapi call.
 	 */
 	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
-	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
-		xfs_trans_cancel(tp, 0);
-		*tpp = NULL;
-		return error;
-	}
+
 	/*
 	 * Lock the inode, fix the size, and join it to the transaction.
 	 * Hold it so in the normal path, we still have it locked for
 	 * the second transaction.  In the error paths we need it
 	 * held so the cancel won't rele it, see below.
 	 */
-	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 	size = (int)ip->i_d.di_size;
 	ip->i_d.di_size = 0;
-	xfs_trans_ijoin(tp, ip, 0);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	/*
 	 * Find the block(s) so we can inval and unmap them.
@@ -385,67 +377,15 @@ xfs_inactive_symlink_rmt(
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 		goto error0;
 	}
-	/*
-	 * Return with the inode locked but not joined to the transaction.
-	 */
+
+	xfs_trans_ijoin(tp, ip, 0);
 	*tpp = tp;
 	return 0;
 
  error1:
 	xfs_bmap_cancel(&free_list);
  error0:
-	/*
-	 * Have to come here with the inode locked and either
-	 * (held and in the transaction) or (not in the transaction).
-	 * If the inode isn't held then cancel would iput it, but
-	 * that's wrong since this is inactive and the vnode ref
-	 * count is 0 already.
-	 * Cancel won't do anything to the inode if held, but it still
-	 * needs to be locked until the cancel is done, if it was
-	 * joined to the transaction.
-	 */
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-	*tpp = NULL;
 	return error;
-
-}
-
-STATIC int
-xfs_inactive_symlink_local(
-	xfs_inode_t	*ip,
-	xfs_trans_t	**tpp)
-{
-	int		error;
-
-	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
-	/*
-	 * We're freeing a symlink which fit into
-	 * the inode.  Just free the memory used
-	 * to hold the old symlink.
-	 */
-	error = xfs_trans_reserve(*tpp, 0,
-				  XFS_ITRUNCATE_LOG_RES(ip->i_mount),
-				  0, XFS_TRANS_PERM_LOG_RES,
-				  XFS_ITRUNCATE_LOG_COUNT);
-
-	if (error) {
-		xfs_trans_cancel(*tpp, 0);
-		*tpp = NULL;
-		return error;
-	}
-	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-	/*
-	 * Zero length symlinks _can_ exist.
-	 */
-	if (ip->i_df.if_bytes > 0) {
-		xfs_idata_realloc(ip,
-				  -(ip->i_df.if_bytes),
-				  XFS_DATA_FORK);
-		ASSERT(ip->i_df.if_bytes == 0);
-	}
-	return 0;
 }
 
 STATIC int
@@ -604,7 +544,7 @@ xfs_inactive(
 	xfs_trans_t	*tp;
 	xfs_mount_t	*mp;
 	int		error;
-	int		truncate;
+	int		truncate = 0;
 
 	/*
 	 * If the inode is already free, then there can be nothing
@@ -616,17 +556,6 @@ xfs_inactive(
 		return VN_INACTIVE_CACHE;
 	}
 
-	/*
-	 * Only do a truncate if it's a regular file with
-	 * some actual space in it.  It's OK to look at the
-	 * inode's fields without the lock because we're the
-	 * only one with a reference to the inode.
-	 */
-	truncate = ((ip->i_d.di_nlink == 0) &&
-	    ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
-	     (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
-	    S_ISREG(ip->i_d.di_mode));
-
 	mp = ip->i_mount;
 
 	error = 0;
@@ -650,72 +579,54 @@ xfs_inactive(
 		goto out;
 	}
 
-	ASSERT(ip->i_d.di_nlink == 0);
+	if (S_ISREG(ip->i_d.di_mode) &&
+	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
+	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
+		truncate = 1;
 
 	error = xfs_qm_dqattach(ip, 0);
 	if (error)
 		return VN_INACTIVE_CACHE;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-	if (truncate) {
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-		error = xfs_trans_reserve(tp, 0,
-					  XFS_ITRUNCATE_LOG_RES(mp),
-					  0, XFS_TRANS_PERM_LOG_RES,
-					  XFS_ITRUNCATE_LOG_COUNT);
-		if (error) {
-			/* Don't call itruncate_cleanup */
-			ASSERT(XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return VN_INACTIVE_CACHE;
-		}
+	error = xfs_trans_reserve(tp, 0,
+			(truncate || S_ISLNK(ip->i_d.di_mode)) ?
+				XFS_ITRUNCATE_LOG_RES(mp) :
+				XFS_IFREE_LOG_RES(mp),
+			0,
+			XFS_TRANS_PERM_LOG_RES,
+			XFS_ITRUNCATE_LOG_COUNT);
+	if (error) {
+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		xfs_trans_cancel(tp, 0);
+		return VN_INACTIVE_CACHE;
+	}
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
+	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
 
+	if (S_ISLNK(ip->i_d.di_mode)) {
+		/*
+		 * Zero length symlinks _can_ exist.
+		 */
+		if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) {
+			error = xfs_inactive_symlink_rmt(ip, &tp);
+			if (error)
+				goto out_cancel;
+		} else if (ip->i_df.if_bytes > 0) {
+			xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
+					  XFS_DATA_FORK);
+			ASSERT(ip->i_df.if_bytes == 0);
+		}
+	} else if (truncate) {
 		ip->i_d.di_size = 0;
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
 		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
-		if (error) {
-			xfs_trans_cancel(tp,
-				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-			return VN_INACTIVE_CACHE;
-		}
+		if (error)
+			goto out_cancel;
 
 		ASSERT(ip->i_d.di_nextents == 0);
-	} else if (S_ISLNK(ip->i_d.di_mode)) {
-
-		/*
-		 * If we get an error while cleaning up a
-		 * symlink we bail out.
-		 */
-		error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
-			xfs_inactive_symlink_rmt(ip, &tp) :
-			xfs_inactive_symlink_local(ip, &tp);
-
-		if (error) {
-			ASSERT(tp == NULL);
-			return VN_INACTIVE_CACHE;
-		}
-
-		xfs_trans_ijoin(tp, ip, 0);
-	} else {
-		error = xfs_trans_reserve(tp, 0,
-					  XFS_IFREE_LOG_RES(mp),
-					  0, XFS_TRANS_PERM_LOG_RES,
-					  XFS_INACTIVE_LOG_COUNT);
-		if (error) {
-			ASSERT(XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			return VN_INACTIVE_CACHE;
-		}
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
 	}
 
 	/*
@@ -781,7 +692,11 @@ xfs_inactive(
 	xfs_qm_dqdetach(ip);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
- out:
+out:
+	return VN_INACTIVE_CACHE;
+out_cancel:
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 	return VN_INACTIVE_CACHE;
 }
 
-- 
cgit v0.10.2


From fe67be036ff2f713b1c5f24dd4cdffae75bcb97a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 11:13:30 -0400
Subject: xfs: remove xfs_inactive_attrs

Remove this helper as the code flow is a lot more obvious when it gets
merged into its only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index f9a5157..9a2ae8c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -388,54 +388,6 @@ xfs_inactive_symlink_rmt(
 	return error;
 }
 
-STATIC int
-xfs_inactive_attrs(
-	xfs_inode_t	*ip,
-	xfs_trans_t	**tpp)
-{
-	xfs_trans_t	*tp;
-	int		error;
-	xfs_mount_t	*mp;
-
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-	tp = *tpp;
-	mp = ip->i_mount;
-	ASSERT(ip->i_d.di_forkoff != 0);
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	if (error)
-		goto error_unlock;
-
-	error = xfs_attr_inactive(ip);
-	if (error)
-		goto error_unlock;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-	error = xfs_trans_reserve(tp, 0,
-				  XFS_IFREE_LOG_RES(mp),
-				  0, XFS_TRANS_PERM_LOG_RES,
-				  XFS_INACTIVE_LOG_COUNT);
-	if (error)
-		goto error_cancel;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, 0);
-	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
-	ASSERT(ip->i_d.di_anextents == 0);
-
-	*tpp = tp;
-	return 0;
-
-error_cancel:
-	ASSERT(XFS_FORCED_SHUTDOWN(mp));
-	xfs_trans_cancel(tp, 0);
-error_unlock:
-	*tpp = NULL;
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return error;
-}
-
 int
 xfs_release(
 	xfs_inode_t	*ip)
@@ -630,24 +582,40 @@ xfs_inactive(
 	}
 
 	/*
-	 * If there are attributes associated with the file
-	 * then blow them away now.  The code calls a routine
-	 * that recursively deconstructs the attribute fork.
-	 * We need to just commit the current transaction
+	 * If there are attributes associated with the file then blow them away
+	 * now.  The code calls a routine that recursively deconstructs the
+	 * attribute fork.  We need to just commit the current transaction
 	 * because we can't use it for xfs_attr_inactive().
 	 */
 	if (ip->i_d.di_anextents > 0) {
-		error = xfs_inactive_attrs(ip, &tp);
-		/*
-		 * If we got an error, the transaction is already
-		 * cancelled, and the inode is unlocked. Just get out.
-		 */
-		 if (error)
-			 return VN_INACTIVE_CACHE;
-	} else if (ip->i_afp) {
-		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+		ASSERT(ip->i_d.di_forkoff != 0);
+
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (error)
+			goto error_unlock;
+
+		error = xfs_attr_inactive(ip);
+		if (error)
+			goto error_unlock;
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+		error = xfs_trans_reserve(tp, 0,
+					  XFS_IFREE_LOG_RES(mp),
+					  0, XFS_TRANS_PERM_LOG_RES,
+					  XFS_INACTIVE_LOG_COUNT);
+		if (error)
+			goto error_cancel;
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, 0);
 	}
 
+	if (ip->i_afp)
+		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+
+	ASSERT(ip->i_d.di_anextents == 0);
+
 	/*
 	 * Free the inode.
 	 */
@@ -698,6 +666,13 @@ out_cancel:
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 	return VN_INACTIVE_CACHE;
+
+error_cancel:
+	ASSERT(XFS_FORCED_SHUTDOWN(mp));
+	xfs_trans_cancel(tp, 0);
+error_unlock:
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return VN_INACTIVE_CACHE;
 }
 
 /*
-- 
cgit v0.10.2


From 0b56185b0d64ef89dad1c85bb7403fa762cbe50d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 11:13:31 -0400
Subject: xfs: do not take the iolock in xfs_inactive

An inode that enters xfs_inactive has been removed from all global
lists but the inode hash, and can't be recycled in xfs_iget before
it has been marked reclaimable.  Thus taking the iolock in here
is not nessecary at all, and given the amount of lockdep false
positives it has triggered already I'd rather remove the locking.

The only change outside of xfs_inactive is relaxing an assert in
xfs_itruncate_extents.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5c10825..2778258 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1123,7 +1123,9 @@ xfs_itruncate_extents(
 	int			error = 0;
 	int			done = 0;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
+	       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 	ASSERT(new_size <= XFS_ISIZE(ip));
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 	ASSERT(ip->i_itemp != NULL);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9a2ae8c..7927043 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -554,7 +554,7 @@ xfs_inactive(
 		return VN_INACTIVE_CACHE;
 	}
 
-	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
 	if (S_ISLNK(ip->i_d.di_mode)) {
@@ -591,21 +591,24 @@ xfs_inactive(
 		ASSERT(ip->i_d.di_forkoff != 0);
 
 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
-			goto error_unlock;
+			goto out_unlock;
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
 		error = xfs_attr_inactive(ip);
 		if (error)
-			goto error_unlock;
+			goto out;
 
 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 		error = xfs_trans_reserve(tp, 0,
 					  XFS_IFREE_LOG_RES(mp),
 					  0, XFS_TRANS_PERM_LOG_RES,
 					  XFS_INACTIVE_LOG_COUNT);
-		if (error)
-			goto error_cancel;
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			goto out;
+		}
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, 0);
@@ -658,21 +661,13 @@ xfs_inactive(
 	 * Release the dquots held by inode, if any.
 	 */
 	xfs_qm_dqdetach(ip);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out:
 	return VN_INACTIVE_CACHE;
 out_cancel:
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-	return VN_INACTIVE_CACHE;
-
-error_cancel:
-	ASSERT(XFS_FORCED_SHUTDOWN(mp));
-	xfs_trans_cancel(tp, 0);
-error_unlock:
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return VN_INACTIVE_CACHE;
+	goto out_unlock;
 }
 
 /*
-- 
cgit v0.10.2


From 5a15322da1a51ad8f3af1962de355885b6c606f2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 11:13:32 -0400
Subject: xfs: avoid the iolock in xfs_free_eofblocks for evicted inodes

Same rational as the last patch - these inodes are not reachable, so
don't bother with locking.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7927043..2a5c6373 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -146,11 +146,6 @@ xfs_readlink(
 }
 
 /*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_TRYLOCK	(1<<0)
-
-/*
  * This is called by xfs_inactive to free any blocks beyond eof
  * when the link count isn't zero and by xfs_dm_punch_hole() when
  * punching a hole to EOF.
@@ -159,7 +154,7 @@ STATIC int
 xfs_free_eofblocks(
 	xfs_mount_t	*mp,
 	xfs_inode_t	*ip,
-	int		flags)
+	bool		need_iolock)
 {
 	xfs_trans_t	*tp;
 	int		error;
@@ -201,13 +196,11 @@ xfs_free_eofblocks(
 		 */
 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 
-		if (flags & XFS_FREE_EOF_TRYLOCK) {
+		if (need_iolock) {
 			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
 				xfs_trans_cancel(tp, 0);
 				return 0;
 			}
-		} else {
-			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 		}
 
 		error = xfs_trans_reserve(tp, 0,
@@ -217,7 +210,8 @@ xfs_free_eofblocks(
 		if (error) {
 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 			xfs_trans_cancel(tp, 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			if (need_iolock)
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return error;
 		}
 
@@ -244,7 +238,10 @@ xfs_free_eofblocks(
 			error = xfs_trans_commit(tp,
 						XFS_TRANS_RELEASE_LOG_RES);
 		}
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (need_iolock)
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 	}
 	return error;
 }
@@ -466,8 +463,7 @@ xfs_release(
 		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
 			return 0;
 
-		error = xfs_free_eofblocks(mp, ip,
-					   XFS_FREE_EOF_TRYLOCK);
+		error = xfs_free_eofblocks(mp, ip, true);
 		if (error)
 			return error;
 
@@ -524,7 +520,7 @@ xfs_inactive(
 		    (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 		     ip->i_delayed_blks != 0))) {
-			error = xfs_free_eofblocks(mp, ip, 0);
+			error = xfs_free_eofblocks(mp, ip, false);
 			if (error)
 				return VN_INACTIVE_CACHE;
 		}
-- 
cgit v0.10.2


From 4f59af758f9092bc7b266ca919ce6067170e5172 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 4 Jul 2012 11:13:33 -0400
Subject: xfs: remove iolock lock classes

Content-Disposition: inline; filename=xfs-remove-iolock-classes

Now that we never take the iolock during inode reclaim we don't need
to play games with lock classes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rich Johnston <rjohnston@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 1bb4365..784a803 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -41,17 +41,6 @@
 
 
 /*
- * Define xfs inode iolock lockdep classes. We need to ensure that all active
- * inodes are considered the same for lockdep purposes, including inodes that
- * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
- * guarantee the locks are considered the same when there are multiple lock
- * initialisation siteѕ. Also, define a reclaimable inode class so it is
- * obvious in lockdep reports which class the report is against.
- */
-static struct lock_class_key xfs_iolock_active;
-struct lock_class_key xfs_iolock_reclaimable;
-
-/*
  * Allocate and initialise an xfs_inode.
  */
 STATIC struct xfs_inode *
@@ -80,8 +69,6 @@ xfs_inode_alloc(
 	ASSERT(ip->i_ino == 0);
 
 	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-	lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-			&xfs_iolock_active, "xfs_iolock_active");
 
 	/* initialise the xfs inode */
 	ip->i_ino = ino;
@@ -250,8 +237,6 @@ xfs_iget_cache_hit(
 
 		ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
 		mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-		lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-				&xfs_iolock_active, "xfs_iolock_active");
 
 		spin_unlock(&ip->i_flags_lock);
 		spin_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 04d2fe4..94b32f9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -487,8 +487,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define XFS_IOLOCK_DEP(flags)	(((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
 #define XFS_ILOCK_DEP(flags)	(((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
 
-extern struct lock_class_key xfs_iolock_reclaimable;
-
 /*
  * For multiple groups support: if S_ISGID bit is set in the parent
  * directory, group of new file is set to that of the parent, and
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index cb2deb1..bdaf4cb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -874,6 +874,8 @@ xfs_fs_evict_inode(
 {
 	xfs_inode_t		*ip = XFS_I(inode);
 
+	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+
 	trace_xfs_evict_inode(ip);
 
 	truncate_inode_pages(&inode->i_data, 0);
@@ -882,22 +884,6 @@ xfs_fs_evict_inode(
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_DEC(vn_active);
 
-	/*
-	 * The iolock is used by the file system to coordinate reads,
-	 * writes, and block truncates.  Up to this point the lock
-	 * protected concurrent accesses by users of the inode.  But
-	 * from here forward we're doing some final processing of the
-	 * inode because we're done with it, and although we reuse the
-	 * iolock for protection it is really a distinct lock class
-	 * (in the lockdep sense) from before.  To keep lockdep happy
-	 * (and basically indicate what we are doing), we explicitly
-	 * re-init the iolock here.
-	 */
-	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-	lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-			&xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
 	xfs_inactive(ip);
 }
 
-- 
cgit v0.10.2


From 8375f922aaa6e7a880022529202fb486315568c3 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Thu, 28 Jun 2012 06:52:56 -0400
Subject: xfs: re-enable xfsaild idle mode and fix associated races

xfsaild idle mode logic currently leads to a couple hangs:

1.) If xfsaild is rescheduled in during an incremental scan
    (i.e., tout != 0) and the target has been updated since
    the previous run, we can hit the new target and go into
    idle mode with a still populated ail.
2.) A wake up is only issued when the target is pushed forward.
    The wake up can race with xfsaild if it is currently in the
    process of entering idle mode, causing future wake up
    events to be lost.

These hangs have been reproduced and verified as fixed by
running xfstests 273 in a loop on a slightly modified upstream
kernel. The kernel is modified to re-enable idle mode as
previously implemented (when count == 0) and with a revert of
commit 670ce93f, which includes performance improvements that
make this harder to reproduce.

The solution, the algorithm for which has been outlined by
Dave Chinner, is to modify xfsaild to enter idle mode only when
the ail is empty and the push target has not been moved forward
since the last push.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 9c51448..6011ee6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -383,6 +383,12 @@ xfsaild_push(
 	}
 
 	spin_lock(&ailp->xa_lock);
+
+	/* barrier matches the xa_target update in xfs_ail_push() */
+	smp_rmb();
+	target = ailp->xa_target;
+	ailp->xa_target_prev = target;
+
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
 	if (!lip) {
 		/*
@@ -397,7 +403,6 @@ xfsaild_push(
 	XFS_STATS_INC(xs_push_ail);
 
 	lsn = lip->li_lsn;
-	target = ailp->xa_target;
 	while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
 		int	lock_result;
 
@@ -527,8 +532,32 @@ xfsaild(
 			__set_current_state(TASK_KILLABLE);
 		else
 			__set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(tout ?
-				 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+
+		spin_lock(&ailp->xa_lock);
+
+		/*
+		 * Idle if the AIL is empty and we are not racing with a target
+		 * update. We check the AIL after we set the task to a sleep
+		 * state to guarantee that we either catch an xa_target update
+		 * or that a wake_up resets the state to TASK_RUNNING.
+		 * Otherwise, we run the risk of sleeping indefinitely.
+		 *
+		 * The barrier matches the xa_target update in xfs_ail_push().
+		 */
+		smp_rmb();
+		if (!xfs_ail_min(ailp) &&
+		    ailp->xa_target == ailp->xa_target_prev) {
+			spin_unlock(&ailp->xa_lock);
+			schedule();
+			tout = 0;
+			continue;
+		}
+		spin_unlock(&ailp->xa_lock);
+
+		if (tout)
+			schedule_timeout(msecs_to_jiffies(tout));
+
+		__set_current_state(TASK_RUNNING);
 
 		try_to_freeze();
 
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index fb62377..53b7c9b 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -67,6 +67,7 @@ struct xfs_ail {
 	struct task_struct	*xa_task;
 	struct list_head	xa_ail;
 	xfs_lsn_t		xa_target;
+	xfs_lsn_t		xa_target_prev;
 	struct list_head	xa_cursors;
 	spinlock_t		xa_lock;
 	xfs_lsn_t		xa_last_pushed_lsn;
-- 
cgit v0.10.2


From 9a57fa8ee7c29e11c2a29ce058573ba99157eda7 Mon Sep 17 00:00:00 2001
From: Mark Tinguely <tinguely@sgi.com>
Date: Tue, 24 Jul 2012 10:59:19 -0500
Subject: xfs: wait for the write the superblock on unmount

v2: Add the xfs_buf_lock to xfs_quiesce_attr().
    Add explaination why xfs_buf_lock() is used to wait for write.

xfs_wait_buftarg() does not wait for the completion of the write of the
uncached superblock. This write can race with the shutdown of the log
and causes a panic if the write does not win the race.

During the log write, xfsaild_push() will lock the buffer and set the
XBF_ASYNC flag. Because the XBF_FLAG is set, complete() is not performed
on the buffer's iowait entry, we cannot call xfs_buf_iowait() to wait
for the write to complete. The buffer's lock is held until the write is
complete, so we can block on a xfs_buf_lock() request to be notified
that the write is complete.

Signed-off-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9536fd1..711ca51 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1529,6 +1529,15 @@ xfs_unmountfs(
 	xfs_ail_push_all_sync(mp->m_ail);
 	xfs_wait_buftarg(mp->m_ddev_targp);
 
+	/*
+	 * The superblock buffer is uncached and xfsaild_push() will lock and
+	 * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait()
+	 * here but a lock on the superblock buffer will block until iodone()
+	 * has completed.
+	 */
+	xfs_buf_lock(mp->m_sb_bp);
+	xfs_buf_unlock(mp->m_sb_bp);
+
 	xfs_log_unmount_write(mp);
 	xfs_log_unmount(mp);
 	xfs_uuid_unmount(mp);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index e61fc15..97304f1 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -359,6 +359,15 @@ xfs_quiesce_attr(
 	 * added an item to the AIL, thus flush it again.
 	 */
 	xfs_ail_push_all_sync(mp->m_ail);
+
+	/*
+	 * The superblock buffer is uncached and xfsaild_push() will lock and
+	 * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait()
+	 * here but a lock on the superblock buffer will block until iodone()
+	 * has completed.
+	 */
+	xfs_buf_lock(mp->m_sb_bp);
+	xfs_buf_unlock(mp->m_sb_bp);
 }
 
 static void
-- 
cgit v0.10.2