From b1d6cc02f2f6a590c4d8dc2c3bcf7be3b9419945 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 2 Oct 2014 09:17:58 +1000
Subject: xfs: compat_xfs_bstat does not have forkoff

struct compat_xfs_bstat is missing the di_forkoff field and so does
not fully translate the structure correctly. Fix it.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index a554646..94ce027 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -160,6 +160,7 @@ xfs_ioctl32_bstat_copyin(
 	    get_user(bstat->bs_gen,	&bstat32->bs_gen)	||
 	    get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
 	    get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
+	    get_user(bstat->bs_forkoff,	&bstat32->bs_forkoff)	||
 	    get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask)	||
 	    get_user(bstat->bs_dmstate,	&bstat32->bs_dmstate)	||
 	    get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -214,6 +215,7 @@ xfs_bulkstat_one_fmt_compat(
 	    put_user(buffer->bs_gen,	  &p32->bs_gen)		||
 	    put_user(buffer->bs_projid,	  &p32->bs_projid)	||
 	    put_user(buffer->bs_projid_hi,	&p32->bs_projid_hi)	||
+	    put_user(buffer->bs_forkoff,  &p32->bs_forkoff)	||
 	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)	||
 	    put_user(buffer->bs_dmstate,  &p32->bs_dmstate)	||
 	    put_user(buffer->bs_aextents, &p32->bs_aextents))
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 80f4060..b1bb454 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -67,8 +67,9 @@ typedef struct compat_xfs_bstat {
 	__u32		bs_gen;		/* generation count		*/
 	__u16		bs_projid_lo;	/* lower part of project id	*/
 #define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
+	__u16		bs_forkoff;	/* inode fork offset in bytes	*/
 	__u16		bs_projid_hi;	/* high part of project id	*/
-	unsigned char	bs_pad[12];	/* pad space, unused		*/
+	unsigned char	bs_pad[10];	/* pad space, unused		*/
 	__u32		bs_dmevmask;	/* DMIG event mask		*/
 	__u16		bs_dmstate;	/* DMIG state info		*/
 	__u16		bs_aextents;	/* attribute number of extents	*/
-- 
cgit v0.10.2


From e076b0f3a5c472e77c0a0e163188f2761e8b4fed Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 2 Oct 2014 09:18:13 +1000
Subject: xfs: kill time.h

The typedef for timespecs and nanotime() are completely unnecessary,
and delay() can be moved to fs/xfs/linux.h, which means this file
can go away.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 844e288..53e95b2 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -21,7 +21,6 @@
 #include <linux/swap.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
-#include "time.h"
 #include "kmem.h"
 #include "xfs_message.h"
 
diff --git a/fs/xfs/time.h b/fs/xfs/time.h
deleted file mode 100644
index 387e695..0000000
--- a/fs/xfs/time.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-	schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-	*tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c92cb48..4c130ff 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -654,7 +654,7 @@ xfs_ialloc(
 	xfs_inode_t	*ip;
 	uint		flags;
 	int		error;
-	timespec_t	tv;
+	struct timespec	tv;
 
 	/*
 	 * Call the space management code to pick
@@ -720,7 +720,7 @@ xfs_ialloc(
 	ip->i_d.di_nextents = 0;
 	ASSERT(ip->i_d.di_nblocks == 0);
 
-	nanotime(&tv);
+	tv = current_fs_time(mp->m_super);
 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
 	ip->i_d.di_atime = ip->i_d.di_mtime;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index d10dc8f..6a51619 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -56,7 +56,6 @@ typedef __uint64_t __psunsigned_t;
 
 #include "kmem.h"
 #include "mrlock.h"
-#include "time.h"
 #include "uuid.h"
 
 #include <linux/semaphore.h>
@@ -179,6 +178,11 @@ typedef __uint64_t __psunsigned_t;
 #define MAX(a,b)	(max(a,b))
 #define howmany(x, y)	(((x)+((y)-1))/(y))
 
+static inline void delay(long ticks)
+{
+	schedule_timeout_uninterruptible(ticks);
+}
+
 /*
  * XFS wrapper structure for sysfs support. It depends on external data
  * structures and is embedded in various internal data structures to implement
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 50c3f56..cdb4d86 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -70,7 +70,7 @@ xfs_trans_ichgtime(
 	int			flags)
 {
 	struct inode		*inode = VFS_I(ip);
-	timespec_t		tv;
+	struct timespec		tv;
 
 	ASSERT(tp);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-- 
cgit v0.10.2


From 9336e3a765b68d4a7fdd8256f393ebce95ecb0a7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 2 Oct 2014 09:18:40 +1000
Subject: xfs: project id inheritance is a directory only flag

xfs_set_diflags() allows it to be set on non-directory inodes, and
this flags errors in xfs_repair. Further, inode allocation allows
the same directory-only flag to be inherited to non-directories.
Make sure directory inode flags don't appear on other types of
inodes.

This fixes several xfstests scratch fileystem corruption reports
(e.g. xfs/050) now that xfstests checks scratch filesystems after
test completion.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4c130ff..2f63742 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -769,6 +769,8 @@ xfs_ialloc(
 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 					ip->i_d.di_extsize = pip->i_d.di_extsize;
 				}
+				if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+					di_flags |= XFS_DIFLAG_PROJINHERIT;
 			} else if (S_ISREG(mode)) {
 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_REALTIME;
@@ -789,8 +791,6 @@ xfs_ialloc(
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
 			    xfs_inherit_nosymlinks)
 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
-			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-				di_flags |= XFS_DIFLAG_PROJINHERIT;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
 			    xfs_inherit_nodefrag)
 				di_flags |= XFS_DIFLAG_NODEFRAG;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3799695..05a1955 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -968,8 +968,6 @@ xfs_set_diflags(
 		di_flags |= XFS_DIFLAG_NOATIME;
 	if (xflags & XFS_XFLAG_NODUMP)
 		di_flags |= XFS_DIFLAG_NODUMP;
-	if (xflags & XFS_XFLAG_PROJINHERIT)
-		di_flags |= XFS_DIFLAG_PROJINHERIT;
 	if (xflags & XFS_XFLAG_NODEFRAG)
 		di_flags |= XFS_DIFLAG_NODEFRAG;
 	if (xflags & XFS_XFLAG_FILESTREAM)
@@ -981,6 +979,8 @@ xfs_set_diflags(
 			di_flags |= XFS_DIFLAG_NOSYMLINKS;
 		if (xflags & XFS_XFLAG_EXTSZINHERIT)
 			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+		if (xflags & XFS_XFLAG_PROJINHERIT)
+			di_flags |= XFS_DIFLAG_PROJINHERIT;
 	} else if (S_ISREG(ip->i_d.di_mode)) {
 		if (xflags & XFS_XFLAG_REALTIME)
 			di_flags |= XFS_DIFLAG_REALTIME;
-- 
cgit v0.10.2


From a872703f34cd6033d0b174fa598f63f1a57145bb Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 2 Oct 2014 09:20:30 +1000
Subject: xfs: only set extent size hint when asked

Currently the extent size hint is set unconditionally in
xfs_ioctl_setattr() when the FSX_EXTSIZE flag is set. Hence we can
set hints when the inode flags indicating the hint should be used
are not set.  Hence only set the extent size hint from userspace
when the inode has the XFS_DIFLAG_EXTSIZE flag set to indicate that
we should have an extent size hint set on the inode.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 05a1955..d6afc9f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1231,13 +1231,25 @@ xfs_ioctl_setattr(
 
 	}
 
-	if (mask & FSX_EXTSIZE)
-		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
 	if (mask & FSX_XFLAGS) {
 		xfs_set_diflags(ip, fa->fsx_xflags);
 		xfs_diflags_to_linux(ip);
 	}
 
+	/*
+	 * Only set the extent size hint if we've already determined that the
+	 * extent size hint should be set on the inode. If no extent size flags
+	 * are set on the inode then unconditionally clear the extent size hint.
+	 */
+	if (mask & FSX_EXTSIZE) {
+		int	extsize = 0;
+
+		if (ip->i_d.di_flags &
+				(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT))
+			extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+		ip->i_d.di_extsize = extsize;
+	}
+
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-- 
cgit v0.10.2


From ce57bcf6b81caf1e9f780e98e8d23d3555746d74 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Thu, 2 Oct 2014 09:21:53 +1000
Subject: xfs: check for inode size overflow in xfs_new_eof()

If we write to the maximum file offset (2^63-2), XFS fails to log the
inode size update when the page is flushed. For example:

$ xfs_io -fc "pwrite `echo "2^63-1-1" | bc` 1" /mnt/file
wrote 1/1 bytes at offset 9223372036854775806
1.000000 bytes, 1 ops; 0.0000 sec (22.711 KiB/sec and 23255.8140 ops/sec)
$ stat -c %s /mnt/file
9223372036854775807
$ umount /mnt ; mount <dev> /mnt/
$ stat -c %s /mnt/file
0

This occurs because XFS calculates the new file size as io_offset +
io_size, I/O occurs in block sized requests, and the maximum supported
file size is not block aligned. Therefore, a write to the max allowable
offset on a 4k blocksize fs results in a write of size 4k to offset
2^63-4096 (e.g., equivalent to round_down(2^63-1, 4096), or IOW the
offset of the block that contains the max file size). The offset plus
size calculation (2^63 - 4096 + 4096 == 2^63) overflows the signed
64-bit variable which goes negative and causes the > comparison to the
on-disk inode size to fail. This returns 0 from xfs_new_eof() and
results in no change to the inode on-disk.

Update xfs_new_eof() to explicitly detect overflow of the local
calculation and use the VFS inode size in this scenario. The VFS inode
size is capped to the maximum and thus XFS writes the correct inode size
to disk.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c10e3fa..9af2882 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -102,7 +102,7 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
 {
 	xfs_fsize_t i_size = i_size_read(VFS_I(ip));
 
-	if (new_size > i_size)
+	if (new_size > i_size || new_size < 0)
 		new_size = i_size;
 	return new_size > ip->i_d.di_size ? new_size : 0;
 }
-- 
cgit v0.10.2


From 6ee49a20c13b4b4e79a3bba406df8106cff284a1 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Thu, 2 Oct 2014 09:23:49 +1000
Subject: xfs: don't send null bp to xfs_trans_brelse()

In this case, if bp is NULL, error is set, and we send a
NULL bp to xfs_trans_brelse, which will try to dereference it.

Test whether we actually have a buffer before we try to
free it.

Coverity spotted this.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 2c42ae2..fd82753 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2563,7 +2563,8 @@ xfs_da_get_buf(
 				    mapp, nmap, 0);
 	error = bp ? bp->b_error : -EIO;
 	if (error) {
-		xfs_trans_brelse(trans, bp);
+		if (bp)
+			xfs_trans_brelse(trans, bp);
 		goto out_free;
 	}
 
-- 
cgit v0.10.2


From 04dd1a0d4b17a71220eae4fb313218f15a49bcdd Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Thu, 2 Oct 2014 09:24:11 +1000
Subject: xfs: fix crc field handling in xfs_sb_to/from_disk

I discovered this in userspace, but the same change applies
to the kernel.

If we xfs_mdrestore an image from a non-crc filesystem, lo
and behold the restored image has gained a CRC:

# db/xfs_metadump.sh -o /dev/sdc1 - | xfs_mdrestore - test.img
# xfs_db -c "sb 0" -c "p crc" /dev/sdc1
crc = 0 (correct)
# xfs_db -c "sb 0" -c "p crc" test.img
crc = 0xb6f8d6a0 (correct)

This is because xfs_sb_from_disk doesn't fill in sb_crc,
but xfs_sb_to_disk(XFS_SB_ALL_BITS) does write the in-memory
CRC to disk - so we get uninitialized memory on disk.

Fix this by always initializing sb_crc to 0 when we read
the superblock, and masking out the CRC bit from ALL_BITS
when we write it.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 8426e5e..5f902fa 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -445,6 +445,8 @@ __xfs_sb_from_disk(
 	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
 	to->sb_features_log_incompat =
 				be32_to_cpu(from->sb_features_log_incompat);
+	/* crc is only used on disk, not in memory; just init to 0 here. */
+	to->sb_crc = 0;
 	to->sb_pad = 0;
 	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
 	to->sb_lsn = be64_to_cpu(from->sb_lsn);
@@ -550,6 +552,9 @@ xfs_sb_to_disk(
 	if (!fields)
 		return;
 
+	/* We should never write the crc here, it's updated in the IO path */
+	fields &= ~XFS_SB_CRC;
+
 	xfs_sb_quota_to_disk(to, from, &fields);
 	while (fields) {
 		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
-- 
cgit v0.10.2


From 5cca3f611d159e5a4a5ec60413bd09948ef40aea Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 2 Oct 2014 09:27:09 +1000
Subject: xfs: check for null dquot in xfs_quota_calc_throttle()

Coverity spotted this.

Granted, we *just* checked xfs_inod_dquot() in the caller (by
calling xfs_quota_need_throttle). However, this is the only place we
don't check the return value but the check is cheap and future-proof
so add it.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index e9c47b6..afcf3c9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -404,8 +404,8 @@ xfs_quota_calc_throttle(
 	int shift = 0;
 	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
 
-	/* over hi wmark, squash the prealloc completely */
-	if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
+	/* no dq, or over hi wmark, squash the prealloc completely */
+	if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
 		*qblocks = 0;
 		*qfreesp = 0;
 		return;
-- 
cgit v0.10.2


From 07d08681d26e99d8ba3bc4e56380f2cc04d3ff3b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Thu, 2 Oct 2014 09:42:06 +1000
Subject: xfs: restore buffer_head unwritten bit on ioend cancel

xfs_vm_writepage() walks each buffer_head on the page, maps to the block
on disk and attaches to a running ioend structure that represents the
I/O submission. A new ioend is created when the type of I/O (unwritten,
delayed allocation or overwrite) required for a particular buffer_head
differs from the previous. If a buffer_head is a delalloc or unwritten
buffer, the associated bits are cleared by xfs_map_at_offset() once the
buffer_head is added to the ioend.

The process of mapping each buffer_head occurs in xfs_map_blocks() and
acquires the ilock in blocking or non-blocking mode, depending on the
type of writeback in progress. If the lock cannot be acquired for
non-blocking writeback, we cancel the ioend, redirty the page and
return. Writeback will revisit the page at some later point.

Note that we acquire the ilock for each buffer on the page. Therefore
during non-blocking writeback, it is possible to add an unwritten buffer
to the ioend, clear the unwritten state, fail to acquire the ilock when
mapping a subsequent buffer and cancel the ioend. If this occurs, the
unwritten status of the buffer sitting in the ioend has been lost. The
page will eventually hit writeback again, but xfs_vm_writepage() submits
overwrite I/O instead of unwritten I/O and does not perform unwritten
extent conversion at I/O completion. This leads to data corruption
because unwritten extents are treated as holes on reads and zeroes are
returned instead of reading from disk.

Modify xfs_cancel_ioend() to restore the buffer unwritten bit for ioends
of type XFS_IO_UNWRITTEN. This ensures that unwritten extent conversion
occurs once the page is eventually written back.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11e9b4c..dc3e108 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -548,6 +548,13 @@ xfs_cancel_ioend(
 		do {
 			next_bh = bh->b_private;
 			clear_buffer_async_write(bh);
+			/*
+			 * The unwritten flag is cleared when added to the
+			 * ioend. We're not submitting for I/O so mark the
+			 * buffer unwritten again for next time around.
+			 */
+			if (ioend->io_type == XFS_IO_UNWRITTEN)
+				set_buffer_unwritten(bh);
 			unlock_buffer(bh);
 		} while ((bh = next_bh) != NULL);
 
-- 
cgit v0.10.2


From da5f10969d54006a24777a84ed3eaeeb2a21047f Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Thu, 2 Oct 2014 09:44:54 +1000
Subject: xfs: flush the range before zero range conversion

XFS currently discards delalloc blocks within the target range of a
zero range request. Unaligned start and end offsets are zeroed
through the page cache and the internal, aligned blocks are
converted to unwritten extents.

If EOF is page aligned and covered by a delayed allocation extent.
The inode size is not updated until I/O completion. If a zero range
request discards a delalloc range that covers page aligned EOF as
such, the inode size update never occurs. For example:

$ rm -f /mnt/file
$ xfs_io -fc "pwrite 0 64k" -c "zero 60k 4k" /mnt/file
$ stat -c "%s" /mnt/file
65536
$ umount /mnt
$ mount <dev> /mnt
$ stat -c "%s" /mnt/file
61440

Update xfs_zero_file_space() to flush the range rather than discard
delalloc blocks to ensure that inode size updates occur
appropriately.

[dchinner: Note that this is really a workaround to avoid the
underlying problems. More work is needed (and ongoing) to fix those
issues so this fix is being added as a temporary stop-gap measure. ]

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1cb345e..6f5cb63 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1392,14 +1392,14 @@ xfs_zero_file_space(
 
 	if (start_boundary < end_boundary - 1) {
 		/*
-		 * punch out delayed allocation blocks and the page cache over
-		 * the conversion range
+		 * Writeback the range to ensure any inode size updates due to
+		 * appending writes make it to disk (otherwise we could just
+		 * punch out the delalloc blocks).
 		 */
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_bmap_punch_delalloc_range(ip,
-				XFS_B_TO_FSBT(mp, start_boundary),
-				XFS_B_TO_FSB(mp, end_boundary - start_boundary));
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+				start_boundary, end_boundary - 1);
+		if (error)
+			goto out;
 		truncate_pagecache_range(VFS_I(ip), start_boundary,
 					 end_boundary - 1);
 
-- 
cgit v0.10.2


From 52177937e9ac4573391143065b250403d3a6ae4b Mon Sep 17 00:00:00 2001
From: Mark Tinguely <tinguely@sgi.com>
Date: Fri, 3 Oct 2014 09:09:50 +1000
Subject: xfs: xfs_iflush_done checks the wrong log item callback

Commit 3013683 ("xfs: remove all the inodes on a buffer from the AIL
in bulk") made the xfs inode flush callback more efficient by
combining all the inode writes on the buffer and the deletions of
the inode log item from AIL.

The initial loop in this patch should be looping through all
the log items on the buffer to see which items have
xfs_iflush_done as their callback function. But currently,
only the log item passed to the function has its callback
compared to xfs_iflush_done. If the log item pointer passed to
the function does have the xfs_iflush_done callback function,
then all the log items on the buffer are removed from the
li_bio_list on the buffer b_fspriv and could be removed from
the AIL even though they may have not been written yet.

This problem is masked by the fact that currently all inodes on a
buffer will have the same calback function - either xfs_iflush_done
or xfs_istale_done - and hence the bug cannot manifest in any way.
Still, we need to remove the landmine so that if we add new
callbacks in future this doesn't cause us problems.

Signed-off-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index de5a7be..63de0b0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -615,7 +615,7 @@ xfs_iflush_done(
 	blip = bp->b_fspriv;
 	prev = NULL;
 	while (blip != NULL) {
-		if (lip->li_cb != xfs_iflush_done) {
+		if (blip->li_cb != xfs_iflush_done) {
 			prev = blip;
 			blip = blip->li_bio_list;
 			continue;
-- 
cgit v0.10.2


From a8b1ee8bafc765ebf029d03c5479a69aebff9693 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Mon, 13 Oct 2014 10:21:53 +1100
Subject: xfs: fix agno increment in xfs_inumbers() loop

caused a regression in xfs_inumbers, which in turn broke
xfsdump, causing incomplete dumps.

The loop in xfs_inumbers() needs to fill the user-supplied
buffers, and iterates via xfs_btree_increment, reading new
ags as needed.

But the first time through the loop, if xfs_btree_increment()
succeeds, we continue, which triggers the ++agno at the bottom
of the loop, and we skip to soon to the next ag - without
the proper setup under next_ag to read the next ag.

Fix this by removing the agno increment from the loop conditional,
and only increment agno if we have actually hit the code under
the next_ag: target.

Cc: stable@vger.kernel.org
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f71be9c..f1deb96 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -639,7 +639,8 @@ next_ag:
 		xfs_buf_relse(agbp);
 		agbp = NULL;
 		agino = 0;
-	} while (++agno < mp->m_sb.sb_agcount);
+		agno++;
+	} while (agno < mp->m_sb.sb_agcount);
 
 	if (!error) {
 		if (bufidx) {
-- 
cgit v0.10.2