From 06777d308f8f9ddb67798d34bf193101a4bdf06c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Dec 2009 04:52:13 -0500
Subject: dio: fix use-after-free

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4012885..e82adc2 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * NOTE: filesystems with their own locking have to handle this
 	 * on their own.
 	 */
-	if (dio->flags & DIO_LOCKING) {
+	if (flags & DIO_LOCKING) {
 		if (unlikely((rw & WRITE) && retval < 0)) {
 			loff_t isize = i_size_read(inode);
 			if (end > isize)
-- 
cgit v0.10.2


From b9aff027b2c1d6019d237382c78fd396f9de2ea5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 20 Nov 2009 14:28:35 -0800
Subject: fs: anon_inodes implement dname

Add a d_dname method for anon_inodes filesystem, the same way pipefs and
sockfs pseudo filesystems.  This allows us to remove the DCACHE_UNHASHED
hack from anon_inodes.c (see next patch).

[AV: inumber is useless here, dropped from anon_inodefs_dname()]

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 94f5110..01c529a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -45,6 +45,15 @@ static int anon_inodefs_delete_dentry(struct dentry *dentry)
 	return 1;
 }
 
+/*
+ * anon_inodefs_dname() is called from d_path().
+ */
+static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
+				dentry->d_name.name);
+}
+
 static struct file_system_type anon_inode_fs_type = {
 	.name		= "anon_inodefs",
 	.get_sb		= anon_inodefs_get_sb,
@@ -52,6 +61,7 @@ static struct file_system_type anon_inode_fs_type = {
 };
 static const struct dentry_operations anon_inodefs_dentry_operations = {
 	.d_delete	= anon_inodefs_delete_dentry,
+	.d_dname	= anon_inodefs_dname,
 };
 
 /*
-- 
cgit v0.10.2


From a3a065e3f13da8a3470ed09c7f38aad256083726 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 18 Nov 2009 05:30:19 +0100
Subject: fs: no games with DCACHE_UNHASHED

Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
fd link from being used if its dentry is not in the hash.

Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
that depends on the filesystem calling d_add or d_rehash.

So delete the misleading comments and needless code.

Acked-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 01c529a..2c99459 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,16 +35,6 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
 			     mnt);
 }
 
-static int anon_inodefs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * We faked vfs to believe the dentry was hashed when we created it.
-	 * Now we restore the flag so that dput() will work correctly.
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 1;
-}
-
 /*
  * anon_inodefs_dname() is called from d_path().
  */
@@ -60,7 +50,6 @@ static struct file_system_type anon_inode_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 static const struct dentry_operations anon_inodefs_dentry_operations = {
-	.d_delete	= anon_inodefs_delete_dentry,
 	.d_dname	= anon_inodefs_dname,
 };
 
@@ -129,8 +118,6 @@ struct file *anon_inode_getfile(const char *name,
 	atomic_inc(&anon_inode_inode->i_count);
 
 	path.dentry->d_op = &anon_inodefs_dentry_operations;
-	/* Do not publish this dentry inside the global dentry hash table */
-	path.dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
diff --git a/fs/pipe.c b/fs/pipe.c
index 43d79da..37ba29f 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
 }
 
 static struct vfsmount *pipe_mnt __read_mostly;
-static int pipefs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * At creation time, we pretended this dentry was hashed
-	 * (by clearing DCACHE_UNHASHED bit in d_flags)
-	 * At delete time, we restore the truth : not hashed.
-	 * (so that dput() can proceed correctly)
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 0;
-}
 
 /*
  * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 }
 
 static const struct dentry_operations pipefs_dentry_operations = {
-	.d_delete	= pipefs_delete_dentry,
 	.d_dname	= pipefs_dname,
 };
 
@@ -989,12 +977,6 @@ struct file *create_write_pipe(int flags)
 	path.mnt = mntget(pipe_mnt);
 
 	path.dentry->d_op = &pipefs_dentry_operations;
-	/*
-	 * We dont want to publish this dentry into global dentry hash table.
-	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-	 * This permits a working /proc/$pid/fd/XXX on pipes
-	 */
-	path.dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
diff --git a/net/socket.c b/net/socket.c
index dbfdfa9..769c386 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = {
 	.kill_sb =	kill_anon_super,
 };
 
-static int sockfs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * At creation time, we pretended this dentry was hashed
-	 * (by clearing DCACHE_UNHASHED bit in d_flags)
-	 * At delete time, we restore the truth : not hashed.
-	 * (so that dput() can proceed correctly)
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 0;
-}
-
 /*
  * sockfs_dname() is called from d_path().
  */
@@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 }
 
 static const struct dentry_operations sockfs_dentry_operations = {
-	.d_delete = sockfs_delete_dentry,
 	.d_dname  = sockfs_dname,
 };
 
@@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	path.mnt = mntget(sock_mnt);
 
 	path.dentry->d_op = &sockfs_dentry_operations;
-	/*
-	 * We dont want to push this dentry into global dentry hash table.
-	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-	 * This permits a working /proc/$pid/fd/XXX on sockets
-	 */
-	path.dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-- 
cgit v0.10.2


From cb59861f03a626196a23fdef5e20ddbb8cca6466 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 16 Nov 2009 12:05:20 -0800
Subject: vfs: remove extraneous NULL d_inode check from do_filp_open

We can't get to this point unless it's a valid pointer.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index d2783c8..dad4b80 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1764,7 +1764,7 @@ do_last:
 
 	path_to_nameidata(&path, &nd);
 	error = -EISDIR;
-	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
+	if (S_ISDIR(path.dentry->d_inode->i_mode))
 		goto exit;
 ok:
 	/*
-- 
cgit v0.10.2


From 9afa2fb6c13501e5b3536d15344fce4e5442c469 Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Wed, 2 Dec 2009 19:51:54 -0500
Subject: fsstack/ecryptfs: remove unused get_nlinks param to
 fsstack_copy_attr_all

This get_nlinks parameter was never used by the only mainline user,
ecryptfs; and it has never been used by unionfs or wrapfs either.

Acked-by: Dustin Kirkland <kirkland@canonical.com>
Acked-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ad..8f006a0 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
 
-		fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 	}
 out:
 	return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62..429ca0b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 	if (new_dir != old_dir)
-		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	rc = notify_change(lower_dentry, ia);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c..567bc4b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6..0e20e43 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -14,11 +14,8 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 {
 	dest->i_mode = src->i_mode;
 	dest->i_uid = src->i_uid;
@@ -29,14 +26,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 	dest->i_ctime = src->i_ctime;
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
-
-	/*
-	 * Update the nlinks AFTER updating the above fields, because the
-	 * get_links callback may depend on them.
-	 */
-	if (!get_nlinks)
-		dest->i_nlink = src->i_nlink;
-	else
-		dest->i_nlink = (*get_nlinks)(dest);
+	dest->i_nlink = src->i_nlink;
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ce..aa60311 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -8,9 +8,7 @@
 #include <linux/fs.h>
 
 /* externs for fs/stack.c */
-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *));
-
+extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
 extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
 
 /* inlines */
-- 
cgit v0.10.2


From 1b8ab8159ef8f818f870a1d2e3b6953d80eefd3f Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Thu, 3 Dec 2009 21:56:09 -0500
Subject: VFS/fsstack: handle 32-bit smp + preempt + large files in
 fsstack_copy_inode_size

Copy the inode size and blocks from one inode to another correctly on 32-bit
systems with CONFIG_SMP, CONFIG_PREEMPT, or CONFIG_LBDAF.  Use proper inode
spinlocks only when i_size/i_blocks cannot fit in one 32-bit word.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/stack.c b/fs/stack.c
index 0e20e43..4a6f7f4 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,10 +7,58 @@
  * This function cannot be inlined since i_size_{read,write} is rather
  * heavy-weight on 32-bit systems
  */
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 {
-	i_size_write(dst, i_size_read((struct inode *)src));
-	dst->i_blocks = src->i_blocks;
+	loff_t i_size;
+	blkcnt_t i_blocks;
+
+	/*
+	 * i_size_read() includes its own seqlocking and protection from
+	 * preemption (see include/linux/fs.h): we need nothing extra for
+	 * that here, and prefer to avoid nesting locks than attempt to keep
+	 * i_size and i_blocks in sync together.
+	 */
+	i_size = i_size_read(src);
+
+	/*
+	 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+	 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+	 * though stat's generic_fillattr() doesn't bother, and we won't be
+	 * applying quotas (where i_blocks does become important) at the
+	 * upper level.
+	 *
+	 * We don't actually know what locking is used at the lower level;
+	 * but if it's a filesystem that supports quotas, it will be using
+	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
+	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+	 * holes; but its i_blocks cannot carry into the upper long without
+	 * almost 2TB swap - let's ignore that case.
+	 */
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_lock(&src->i_lock);
+	i_blocks = src->i_blocks;
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&src->i_lock);
+
+	/*
+	 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+	 * fsstack_copy_inode_size() to hold some lock around
+	 * i_size_write(), otherwise i_size_read() may spin forever (see
+	 * include/linux/fs.h).  We don't necessarily hold i_mutex when this
+	 * is called, so take i_lock for that case.
+	 *
+	 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+	 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+	 * for that case too, and do both at once by combining the tests.
+	 *
+	 * There is none of this locking overhead in the 64-bit case.
+	 */
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_lock(&dst->i_lock);
+	i_size_write(dst, i_size);
+	dst->i_blocks = i_blocks;
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&dst->i_lock);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index aa60311..da317c7 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -9,7 +9,7 @@
 
 /* externs for fs/stack.c */
 extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
+extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
 
 /* inlines */
 static inline void fsstack_copy_attr_atime(struct inode *dest,
-- 
cgit v0.10.2


From 76b7e0058d09f8104387980a690001681c04cc0a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:20 +0100
Subject: fix up O_SYNC comments

Proper Posix O_SYNC handling only made it into 2.6.33, not 2.6.32.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 21b1117..70145cb 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -16,7 +16,7 @@
 #define O_NOATIME	04000000
 #define O_CLOEXEC	010000000 /* set close_on_exec */
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h
index 7c6681a..e482fe9 100644
--- a/arch/mips/include/asm/fcntl.h
+++ b/arch/mips/include/asm/fcntl.h
@@ -19,7 +19,7 @@
 #define FASYNC		0x1000	/* fcntl, for BSD compatibility */
 #define O_LARGEFILE	0x2000	/* allow large file opens */
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h
index 3b9cfb3..38f37b33 100644
--- a/arch/sparc/include/asm/fcntl.h
+++ b/arch/sparc/include/asm/fcntl.h
@@ -19,7 +19,7 @@
 #define O_NOATIME	0x200000
 #define O_CLOEXEC	0x400000
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 681ddf3..fcd268c 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -51,7 +51,7 @@
 #endif
 
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
-- 
cgit v0.10.2


From 7a0ad10c367ab57c899d340372f37880cbe6ab52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:40 +0100
Subject: fold do_sync_file_range into sys_sync_file_range

We recently go rid of all callers of do_sync_file_range as they're better
served with vfs_fsync or the filemap_write_and_wait.  Now that
do_sync_file_range is down to a single caller fold it into it so that people
don't start using it again accidentally.  While at it also switch it from
using __filemap_fdatawrite_range(..., WB_SYNC_ALL) to the more clear
filemap_fdatawrite_range().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sync.c b/fs/sync.c
index 36752a6..418727a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 {
 	int ret;
 	struct file *file;
+	struct address_space *mapping;
 	loff_t endbyte;			/* inclusive */
 	int fput_needed;
 	umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 			!S_ISLNK(i_mode))
 		goto out_put;
 
-	ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+	mapping = file->f_mapping;
+	if (!mapping) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	ret = 0;
+	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WRITE) {
+		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
 out_put:
 	fput_light(file, fput_needed);
 out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
 }
 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
 #endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			  loff_t endbyte, unsigned int flags)
-{
-	int ret;
-
-	if (!mapping) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = 0;
-	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WRITE) {
-		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_ALL);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-	}
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66bc0a5..77a9750 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,10 +1095,6 @@ struct file_lock {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/* fs/sync.c */
-extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			loff_t endbyte, unsigned int flags);
-
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-- 
cgit v0.10.2


From eaff8079d4f1016a12e34ab323737314f24127dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:25:01 +0100
Subject: kill I_LOCK

After I_SYNC was split from I_LOCK the leftover is always used together with
I_NEW and thus superflous.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa..6e220f4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
  * directory entry when gfs2_inode_lookup() is invoked. Part of the code
  * segment inside gfs2_inode_lookup code needs to get moved around.
  *
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
  **/
 
 void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02..03dfeb2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
 	 * Prevent speculative execution through spin_unlock(&inode_lock);
 	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_LOCK);
+	wake_up_bit(&inode->i_state, __I_NEW);
 }
 
 /**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
 	}
 #endif
 	/*
-	 * This is special!  We do not need the spinlock when clearing I_LOCK,
+	 * This is special!  We do not need the spinlock when clearing I_NEW,
 	 * because we're guaranteed that nobody else tries to do anything about
 	 * the state of the inode when it is locked, as we just created it (so
-	 * there can be no old holders that haven't tested I_LOCK).
+	 * there can be no old holders that haven't tested I_NEW).
 	 * However we must emit the memory barrier so that other CPUs reliably
-	 * see the clearing of I_LOCK after the other inode initialisation has
+	 * see the clearing of I_NEW after the other inode initialisation has
 	 * completed.
 	 */
 	smp_mb();
-	WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
-	inode->i_state &= ~(I_LOCK|I_NEW);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
 	wake_up_inode(inode);
 }
 EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 				goto set_failed;
 
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		if (!old) {
 			inode->i_ino = ino;
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 
 	while (1) {
 		struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
  * until the deletion _might_ have completed.  Callers are responsible
  * to recheck inode state.
  *
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
  * wake_up_inode() after removing from the hash list will DTRT.
  *
  * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
 static void __wait_on_freeing_inode(struct inode *inode)
 {
 	wait_queue_head_t *wq;
-	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
-	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_lock);
 	schedule();
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d0..d945ea7 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		 */
 		/*
 		 * I believe this code is no longer needed.  Splitting I_LOCK
-		 * into two bits, I_LOCK and I_SYNC should prevent this
+		 * into two bits, I_NEW and I_SYNC should prevent this
 		 * deadlock as well.  But since I don't have a JFS testload
 		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
 		 * Joern
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034..dc2505a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
  * the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
  *    is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
  * necessary fields in @vi as well as initializing the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
  * normal directory inodes.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f8..16a6444 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
  *
  * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
  * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
  * set as well. However, UBIFS disables readahead.
  */
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298..2259460 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
 	struct inode		*inode = &ip->i_vnode;
 
 	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW|I_LOCK;
+	inode->i_state = I_NEW;
 	inode_add_to_lists(ip->i_mount->m_super, inode);
 
 	inode->i_mode	= ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2..fa402a6 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
 	ip->i_new_size = 0;
 
 	/* prevent anyone from using this yet */
-	VFS_I(ip)->i_state = I_NEW|I_LOCK;
+	VFS_I(ip)->i_state = I_NEW;
 
 	return ip;
 }
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
 			trace_xfs_iget_reclaim(ip);
 			goto out_error;
 		}
-		inode->i_state = I_LOCK|I_NEW;
+		inode->i_state = I_NEW;
 	} else {
 		/* If the VFS inode is being torn down, pause and try again. */
 		if (!igrab(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 77a9750..cca1919 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1587,7 +1587,7 @@ struct super_operations {
  * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
  * various stages of removing an inode.
  *
- * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
  *
  * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
  *			fdatasync().  i_atime is the usual cause.
@@ -1596,8 +1596,14 @@ struct super_operations {
  *			don't have to write inode on fdatasync() when only
  *			mtime has changed in it.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
- * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
- *			are cleared by unlock_new_inode(), called from iget().
+ * I_NEW		Serves as both a mutex and completion notification.
+ *			New inodes set I_NEW.  If two processes both create
+ *			the same inode, one of them will release its inode and
+ *			wait for I_NEW to be released before returning.
+ *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ *			also cause waiting on I_NEW, without I_NEW actually
+ *			being set.  find_inode() uses this to prevent returning
+ *			nearly-dead inodes.
  * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
  *			is zero.  I_FREEING must be set when I_WILL_FREE is
  *			cleared.
@@ -1611,20 +1617,11 @@ struct super_operations {
  *			prohibited for many purposes.  iget() must wait for
  *			the inode to be completely released, then create it
  *			anew.  Other functions will just ignore such inodes,
- *			if appropriate.  I_LOCK is used for waiting.
+ *			if appropriate.  I_NEW is used for waiting.
  *
- * I_LOCK		Serves as both a mutex and completion notification.
- *			New inodes set I_LOCK.  If two processes both create
- *			the same inode, one of them will release its inode and
- *			wait for I_LOCK to be released before returning.
- *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- *			also cause waiting on I_LOCK, without I_LOCK actually
- *			being set.  find_inode() uses this to prevent returning
- *			nearly-dead inodes.
- * I_SYNC		Similar to I_LOCK, but limited in scope to writeback
- *			of inode dirty data.  Having a separate lock for this
- *			purpose reduces latency and prevents some filesystem-
- *			specific deadlocks.
+ * I_SYNC		Synchonized write of dirty inode data.  The bits is
+ *			set during data writeback, and cleared with a wakeup
+ *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
@@ -1633,13 +1630,12 @@ struct super_operations {
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
 #define I_DIRTY_PAGES		4
-#define I_NEW			8
+#define __I_NEW			3
+#define I_NEW			(1 << __I_NEW)
 #define I_WILL_FREE		16
 #define I_FREEING		32
 #define I_CLEAR			64
-#define __I_LOCK		7
-#define I_LOCK			(1 << __I_LOCK)
-#define __I_SYNC		8
+#define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 705f01f..c18c008 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
-							TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 static inline void inode_sync_wait(struct inode *inode)
 {
-- 
cgit v0.10.2