From 6d13f69444bd3d4888e43f7756449748f5a98bad Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 29 Sep 2014 14:46:30 -0400
Subject: missing data dependency barrier in prepend_name()

AFAICS, prepend_name() is broken on SMP alpha.  Disclaimer: I don't have
SMP alpha boxen to reproduce it on.  However, it really looks like the race
is real.

CPU1: d_path() on /mnt/ramfs/<255-character>/foo
CPU2: mv /mnt/ramfs/<255-character> /mnt/ramfs/<63-character>

CPU2 does d_alloc(), which allocates an external name, stores the name there
including terminating NUL, does smp_wmb() and stores its address in
dentry->d_name.name.  It proceeds to d_add(dentry, NULL) and d_move()
old dentry over to that.  ->d_name.name value ends up in that dentry.

In the meanwhile, CPU1 gets to prepend_name() for that dentry.  It fetches
->d_name.name and ->d_name.len; the former ends up pointing to new name
(64-byte kmalloc'ed array), the latter - 255 (length of the old name).
Nothing to force the ordering there, and normally that would be OK, since we'd
run into the terminating NUL and stop.  Except that it's alpha, and we'd need
a data dependency barrier to guarantee that we see that store of NUL
__d_alloc() has done.  In a similar situation dentry_cmp() would survive; it
does explicit smp_read_barrier_depends() after fetching ->d_name.name.
prepend_name() doesn't and it risks walking past the end of kmalloc'ed object
and possibly oops due to taking a page fault in kernel mode.

Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index cb25a1a..e7484f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2810,6 +2810,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
  * the beginning of the name. The sequence number check at the caller will
  * retry it again when a d_move() does happen. So any garbage in the buffer
  * due to mismatched pointer and length will be discarded.
+ *
+ * Data dependency barrier is needed to make sure that we see that terminating
+ * NUL.  Alpha strikes again, film at 11...
  */
 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 {
@@ -2817,6 +2820,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 	u32 dlen = ACCESS_ONCE(name->len);
 	char *p;
 
+	smp_read_barrier_depends();
+
 	*buflen -= dlen + 1;
 	if (*buflen < 0)
 		return -ENAMETOOLONG;
-- 
cgit v0.10.2


From 8d85b4845a668d9a72649005c5aa932657311bd4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 29 Sep 2014 14:54:27 -0400
Subject: Allow sharing external names after __d_move()

* external dentry names get a small structure prepended to them
(struct external_name).
* it contains an atomic refcount, matching the number of struct dentry
instances that have ->d_name.name pointing to that external name.  The
first thing free_dentry() does is decrementing refcount of external name,
so the instances that are between the call of free_dentry() and
RCU-delayed actual freeing do not contribute.
* __d_move(x, y, false) makes the name of x equal to the name of y,
external or not.  If y has an external name, extra reference is grabbed
and put into x->d_name.name.  If x used to have an external name, the
reference to the old name is dropped and, should it reach zero, freeing
is scheduled via kfree_rcu().
* free_dentry() in dentry with external name decrements the refcount of
that name and, should it reach zero, does RCU-delayed call that will
free both the dentry and external name.  Otherwise it does what it
used to do, except that __d_free() doesn't even look at ->d_name.name;
it simply frees the dentry.

All non-RCU accesses to dentry external name are safe wrt freeing since they
all should happen before free_dentry() is called.  RCU accesses might run
into a dentry seen by free_dentry() or into an old name that got already
dropped by __d_move(); however, in both cases dentry must have been
alive and refer to that name at some point after we'd done rcu_read_lock(),
which means that any freeing must be still pending.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index e7484f9..4858d2e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -235,18 +235,44 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
 	return dentry_string_cmp(cs, ct, tcount);
 }
 
+struct external_name {
+	union {
+		atomic_t count;
+		struct rcu_head head;
+	} u;
+	unsigned char name[];
+};
+
+static inline struct external_name *external_name(struct dentry *dentry)
+{
+	return container_of(dentry->d_name.name, struct external_name, name[0]);
+}
+
 static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 
 	WARN_ON(!hlist_unhashed(&dentry->d_alias));
-	if (dname_external(dentry))
-		kfree(dentry->d_name.name);
+	kmem_cache_free(dentry_cache, dentry); 
+}
+
+static void __d_free_external(struct rcu_head *head)
+{
+	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+	WARN_ON(!hlist_unhashed(&dentry->d_alias));
+	kfree(external_name(dentry));
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
 static void dentry_free(struct dentry *dentry)
 {
+	if (unlikely(dname_external(dentry))) {
+		struct external_name *p = external_name(dentry);
+		if (likely(atomic_dec_and_test(&p->u.count))) {
+			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
+			return;
+		}
+	}
 	/* if dentry was never visible to RCU, immediate free is OK */
 	if (!(dentry->d_flags & DCACHE_RCUACCESS))
 		__d_free(&dentry->d_u.d_rcu);
@@ -1438,11 +1464,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	 */
 	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
 	if (name->len > DNAME_INLINE_LEN-1) {
-		dname = kmalloc(name->len + 1, GFP_KERNEL);
-		if (!dname) {
+		size_t size = offsetof(struct external_name, name[1]);
+		struct external_name *p = kmalloc(size + name->len, GFP_KERNEL);
+		if (!p) {
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 		}
+		atomic_set(&p->u.count, 1);
+		dname = p->name;
 	} else  {
 		dname = dentry->d_iname;
 	}	
@@ -2372,11 +2401,10 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 }
 EXPORT_SYMBOL(dentry_update_name_case);
 
-static void switch_names(struct dentry *dentry, struct dentry *target,
-			 bool exchange)
+static void swap_names(struct dentry *dentry, struct dentry *target)
 {
-	if (dname_external(target)) {
-		if (dname_external(dentry)) {
+	if (unlikely(dname_external(target))) {
+		if (unlikely(dname_external(dentry))) {
 			/*
 			 * Both external: swap the pointers
 			 */
@@ -2392,7 +2420,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
 			target->d_name.name = target->d_iname;
 		}
 	} else {
-		if (dname_external(dentry)) {
+		if (unlikely(dname_external(dentry))) {
 			/*
 			 * dentry:external, target:internal.  Give dentry's
 			 * storage to target and make dentry internal
@@ -2407,12 +2435,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
 			 */
 			unsigned int i;
 			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
-			if (!exchange) {
-				memcpy(dentry->d_iname, target->d_name.name,
-						target->d_name.len + 1);
-				dentry->d_name.hash_len = target->d_name.hash_len;
-				return;
-			}
 			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
 				swap(((long *) &dentry->d_iname)[i],
 				     ((long *) &target->d_iname)[i]);
@@ -2422,6 +2444,24 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
 	swap(dentry->d_name.hash_len, target->d_name.hash_len);
 }
 
+static void copy_name(struct dentry *dentry, struct dentry *target)
+{
+	struct external_name *old_name = NULL;
+	if (unlikely(dname_external(dentry)))
+		old_name = external_name(dentry);
+	if (unlikely(dname_external(target))) {
+		atomic_inc(&external_name(target)->u.count);
+		dentry->d_name = target->d_name;
+	} else {
+		memcpy(dentry->d_iname, target->d_name.name,
+				target->d_name.len + 1);
+		dentry->d_name.name = dentry->d_iname;
+		dentry->d_name.hash_len = target->d_name.hash_len;
+	}
+	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
+		kfree_rcu(old_name, u.head);
+}
+
 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
 {
 	/*
@@ -2518,7 +2558,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
 	}
 
 	/* Switch the names.. */
-	switch_names(dentry, target, exchange);
+	if (exchange)
+		swap_names(dentry, target);
+	else
+		copy_name(dentry, target);
 
 	/* ... and switch them in the tree */
 	if (IS_ROOT(dentry)) {
-- 
cgit v0.10.2


From b3ca406f2755c20cea1cc1169672c56dd03c266c Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 10 Sep 2014 09:56:22 +0800
Subject: autofs - remove obsolete d_invalidate() from expire

Biederman's umount-on-rmdir series changes d_invalidate() to sumarily remove
mounts under the passed in dentry regardless of whether they are busy
or not. So calling this in fs/autofs4/expire.c:autofs4_tree_busy() is
definitely the wrong thing to do becuase it will silently umount entries
instead of just cleaning stale dentrys.

But this call shouldn't be needed and testing shows that automounting
continues to function without it.

As Al Viro correctly surmises the original intent of the call was to
perform what shrink_dcache_parent() does.

If at some time in the future I see stale dentries accumulating
following failed mounts I'll revisit the issue and possibly add a
shrink_dcache_parent() call if needed.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a7be57e..8fa3895 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -255,12 +255,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			struct autofs_info *ino = autofs4_dentry_ino(p);
 			unsigned int ino_count = atomic_read(&ino->count);
 
-			/*
-			 * Clean stale dentries below that have not been
-			 * invalidated after a mount fail during lookup
-			 */
-			d_invalidate(p);
-
 			/* allow for dget above and top is already dgot */
 			if (p == top)
 				ino_count += 2;
-- 
cgit v0.10.2


From 9ea459e110df32e60a762f311f7939eaa879601d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Aug 2014 13:08:20 -0400
Subject: delayed mntput

On final mntput() we want fs shutdown to happen before return to
userland; however, the only case where we want it happen right
there (i.e. where task_work_add won't do) is MNT_INTERNAL victim.
Those have to be fully synchronous - failure halfway through module
init might count on having vfsmount killed right there.  Fortunately,
final mntput on MNT_INTERNAL vfsmounts happens on shallow stack.
So we handle those synchronously and do an analog of delayed fput
logics for everything else.

As the result, we are guaranteed that fs shutdown will always happen
on shallow stack.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/mount.h b/fs/mount.h
index 6740a62..8f2a14a 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -29,7 +29,10 @@ struct mount {
 	struct mount *mnt_parent;
 	struct dentry *mnt_mountpoint;
 	struct vfsmount mnt;
-	struct rcu_head mnt_rcu;
+	union {
+		struct rcu_head mnt_rcu;
+		struct llist_node mnt_llist;
+	};
 #ifdef CONFIG_SMP
 	struct mnt_pcp __percpu *mnt_pcp;
 #else
diff --git a/fs/namespace.c b/fs/namespace.c
index ef42d9b..0441343 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,6 +23,7 @@
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
 #include <linux/bootmem.h>
+#include <linux/task_work.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -957,6 +958,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	return ERR_PTR(err);
 }
 
+static void cleanup_mnt(struct mount *mnt)
+{
+	/*
+	 * This probably indicates that somebody messed
+	 * up a mnt_want/drop_write() pair.  If this
+	 * happens, the filesystem was probably unable
+	 * to make r/w->r/o transitions.
+	 */
+	/*
+	 * The locking used to deal with mnt_count decrement provides barriers,
+	 * so mnt_get_writers() below is safe.
+	 */
+	WARN_ON(mnt_get_writers(mnt));
+	if (unlikely(mnt->mnt_pins.first))
+		mnt_pin_kill(mnt);
+	fsnotify_vfsmount_delete(&mnt->mnt);
+	dput(mnt->mnt.mnt_root);
+	deactivate_super(mnt->mnt.mnt_sb);
+	mnt_free_id(mnt);
+	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
+}
+
+static void __cleanup_mnt(struct rcu_head *head)
+{
+	cleanup_mnt(container_of(head, struct mount, mnt_rcu));
+}
+
+static LLIST_HEAD(delayed_mntput_list);
+static void delayed_mntput(struct work_struct *unused)
+{
+	struct llist_node *node = llist_del_all(&delayed_mntput_list);
+	struct llist_node *next;
+
+	for (; node; node = next) {
+		next = llist_next(node);
+		cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
+	}
+}
+static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
+
 static void mntput_no_expire(struct mount *mnt)
 {
 	rcu_read_lock();
@@ -982,24 +1023,18 @@ static void mntput_no_expire(struct mount *mnt)
 	list_del(&mnt->mnt_instance);
 	unlock_mount_hash();
 
-	/*
-	 * This probably indicates that somebody messed
-	 * up a mnt_want/drop_write() pair.  If this
-	 * happens, the filesystem was probably unable
-	 * to make r/w->r/o transitions.
-	 */
-	/*
-	 * The locking used to deal with mnt_count decrement provides barriers,
-	 * so mnt_get_writers() below is safe.
-	 */
-	WARN_ON(mnt_get_writers(mnt));
-	if (unlikely(mnt->mnt_pins.first))
-		mnt_pin_kill(mnt);
-	fsnotify_vfsmount_delete(&mnt->mnt);
-	dput(mnt->mnt.mnt_root);
-	deactivate_super(mnt->mnt.mnt_sb);
-	mnt_free_id(mnt);
-	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
+	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
+		struct task_struct *task = current;
+		if (likely(!(task->flags & PF_KTHREAD))) {
+			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
+			if (!task_work_add(task, &mnt->mnt_rcu, true))
+				return;
+		}
+		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
+			schedule_delayed_work(&delayed_mntput_work, 1);
+		return;
+	}
+	cleanup_mnt(mnt);
 }
 
 void mntput(struct vfsmount *mnt)
-- 
cgit v0.10.2


From 3ccb354d641d910309b916b9c856e2a82ced7237 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 12 Feb 2014 16:08:06 -0800
Subject: vfs: Document the effect of d_revalidate on d_find_alias

d_drop or check_submounts_and_drop called from d_revalidate can result
in renamed directories with child dentries being unhashed.  These
renamed and drop directory dentries can be rehashed after
d_materialise_unique uses d_find_alias to find them.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 4858d2e..1f8e6ac 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -761,7 +761,8 @@ EXPORT_SYMBOL(dget_parent);
  * acquire the reference to alias and return it. Otherwise return NULL.
  * Notice that if inode is a directory there can be only one alias and
  * it can be unhashed only if it has no children, or if it is the root
- * of a filesystem.
+ * of a filesystem, or if the directory was renamed and d_revalidate
+ * was the first vfs operation to notice.
  *
  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
  * any other hashed alias over that one.
-- 
cgit v0.10.2


From bafc9b754f752ea798c39f9b099a228fd56604e0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 07:54:28 -0800
Subject: vfs: More precise tests in d_invalidate

The current comments in d_invalidate about what and why it is doing
what it is doing are wildly off-base.  Which is not surprising as
the comments date back to last minute bug fix of the 2.2 kernel.

The big fat lie of a comment said: If it's a directory, we can't drop
it for fear of somebody re-populating it with children (even though
dropping it would make it unreachable from that root, we still might
repopulate it if it was a working directory or similar).

[AV] What we really need to avoid is multiple dentry aliases of the
same directory inode; on all filesystems that have ->d_revalidate()
we either declare all positive dentries always valid (and thus never
fed to d_invalidate()) or use d_materialise_unique() and/or d_splice_alias(),
which take care of alias prevention.

The current rules are:
- To prevent mount point leaks dentries that are mount points or that
  have childrent that are mount points may not be be unhashed.
- All dentries may be unhashed.
- Directories may be rehashed with d_materialise_unique

check_submounts_and_drop implements this already for well maintained
remote filesystems so implement the current rules in d_invalidate
by just calling check_submounts_and_drop.

The one difference between d_invalidate and check_submounts_and_drop
is that d_invalidate must respect it when a d_revalidate method has
earlier called d_drop so preserve the d_unhashed check in
d_invalidate.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 1f8e6ac..8150e4e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -650,9 +650,8 @@ EXPORT_SYMBOL(dput);
  * @dentry: dentry to invalidate
  *
  * Try to invalidate the dentry if it turns out to be
- * possible. If there are other dentries that can be
- * reached through this one we can't delete it and we
- * return -EBUSY. On success we return 0.
+ * possible. If there are reasons not to delete it
+ * return -EBUSY. On success return 0.
  *
  * no dcache lock.
  */
@@ -667,38 +666,9 @@ int d_invalidate(struct dentry * dentry)
 		spin_unlock(&dentry->d_lock);
 		return 0;
 	}
-	/*
-	 * Check whether to do a partial shrink_dcache
-	 * to get rid of unused child entries.
-	 */
-	if (!list_empty(&dentry->d_subdirs)) {
-		spin_unlock(&dentry->d_lock);
-		shrink_dcache_parent(dentry);
-		spin_lock(&dentry->d_lock);
-	}
-
-	/*
-	 * Somebody else still using it?
-	 *
-	 * If it's a directory, we can't drop it
-	 * for fear of somebody re-populating it
-	 * with children (even though dropping it
-	 * would make it unreachable from the root,
-	 * we might still populate it if it was a
-	 * working directory or similar).
-	 * We also need to leave mountpoints alone,
-	 * directory or not.
-	 */
-	if (dentry->d_lockref.count > 1 && dentry->d_inode) {
-		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
-			spin_unlock(&dentry->d_lock);
-			return -EBUSY;
-		}
-	}
-
-	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	return 0;
+
+	return check_submounts_and_drop(dentry);
 }
 EXPORT_SYMBOL(d_invalidate);
 
-- 
cgit v0.10.2


From 7af1364ffa64db61e386628594836e13d2ef04b5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 4 Oct 2013 19:15:13 -0700
Subject: vfs: Don't allow overwriting mounts in the current mount namespace

In preparation for allowing mountpoints to be renamed and unlinked
in remote filesystems and in other mount namespaces test if on a dentry
there is a mount in the local mount namespace before allowing it to
be renamed or unlinked.

The primary motivation here are old versions of fusermount unmount
which is not safe if the a path can be renamed or unlinked while it is
verifying the mount is safe to unmount.  More recent versions are simpler
and safer by simply using UMOUNT_NOFOLLOW when unmounting a mount
in a directory owned by an arbitrary user.

Miklos Szeredi <miklos@szeredi.hu> reports this is approach is good
enough to remove concerns about new kernels mixed with old versions
of fusermount.

A secondary motivation for restrictions here is that it removing empty
directories that have non-empty mount points on them appears to
violate the rule that rmdir can not remove empty directories.  As
Linus Torvalds pointed out this is useful for programs (like git) that
test if a directory is empty with rmdir.

Therefore this patch arranges to enforce the existing mount point
semantics for local mount namespace.

v2: Rewrote the test to be a drop in replacement for d_mountpoint
v3: Use bool instead of int as the return type of is_local_mountpoint

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/mount.h b/fs/mount.h
index 8f2a14a..8c6a2a6 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -115,3 +115,12 @@ struct proc_mounts {
 #define proc_mounts(p) (container_of((p), struct proc_mounts, m))
 
 extern const struct seq_operations mounts_op;
+
+extern bool __is_local_mountpoint(struct dentry *dentry);
+static inline bool is_local_mountpoint(struct dentry *dentry)
+{
+	if (!d_mountpoint(dentry))
+		return false;
+
+	return __is_local_mountpoint(dentry);
+}
diff --git a/fs/namei.c b/fs/namei.c
index a7b05bf..a3a14b0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3565,6 +3565,8 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	mutex_lock(&dentry->d_inode->i_mutex);
 
 	error = -EBUSY;
+	if (is_local_mountpoint(dentry))
+		goto out;
 	if (d_mountpoint(dentry))
 		goto out;
 
@@ -3681,7 +3683,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
 		return -EPERM;
 
 	mutex_lock(&target->i_mutex);
-	if (d_mountpoint(dentry))
+	if (is_local_mountpoint(dentry) || d_mountpoint(dentry))
 		error = -EBUSY;
 	else {
 		error = security_inode_unlink(dir, dentry);
@@ -4126,6 +4128,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		mutex_lock(&target->i_mutex);
 
 	error = -EBUSY;
+	if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
+		goto out;
 	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
 		goto out;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 0441343..77ffdb8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -667,6 +667,41 @@ struct vfsmount *lookup_mnt(struct path *path)
 	return m;
 }
 
+/*
+ * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
+ *                         current mount namespace.
+ *
+ * The common case is dentries are not mountpoints at all and that
+ * test is handled inline.  For the slow case when we are actually
+ * dealing with a mountpoint of some kind, walk through all of the
+ * mounts in the current mount namespace and test to see if the dentry
+ * is a mountpoint.
+ *
+ * The mount_hashtable is not usable in the context because we
+ * need to identify all mounts that may be in the current mount
+ * namespace not just a mount that happens to have some specified
+ * parent mount.
+ */
+bool __is_local_mountpoint(struct dentry *dentry)
+{
+	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+	struct mount *mnt;
+	bool is_covered = false;
+
+	if (!d_mountpoint(dentry))
+		goto out;
+
+	down_read(&namespace_sem);
+	list_for_each_entry(mnt, &ns->list, mnt_list) {
+		is_covered = (mnt->mnt_mountpoint == dentry);
+		if (is_covered)
+			break;
+	}
+	up_read(&namespace_sem);
+out:
+	return is_covered;
+}
+
 static struct mountpoint *new_mountpoint(struct dentry *dentry)
 {
 	struct hlist_head *chain = mp_hash(dentry);
-- 
cgit v0.10.2


From 0a5eb7c8189922e86a840972cd0b57e41de6f031 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederman@twitter.com>
Date: Sun, 22 Sep 2013 19:37:01 -0700
Subject: vfs: Keep a list of mounts on a mount point

To spot any possible problems call BUG if a mountpoint
is put when it's list of mounts is not empty.

AV: use hlist instead of list_head

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Eric W. Biederman <ebiederman@twitter.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/mount.h b/fs/mount.h
index 8c6a2a6..68bb03e 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -21,6 +21,7 @@ struct mnt_pcp {
 struct mountpoint {
 	struct hlist_node m_hash;
 	struct dentry *m_dentry;
+	struct hlist_head m_list;
 	int m_count;
 };
 
@@ -51,6 +52,7 @@ struct mount {
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	struct mountpoint *mnt_mp;	/* where is it mounted */
+	struct hlist_node mnt_mp_list;	/* list mounts with the same mountpoint */
 #ifdef CONFIG_FSNOTIFY
 	struct hlist_head mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 77ffdb8..99572dd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -225,6 +225,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
+		INIT_HLIST_NODE(&mnt->mnt_mp_list);
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
@@ -731,6 +732,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
 	mp->m_dentry = dentry;
 	mp->m_count = 1;
 	hlist_add_head(&mp->m_hash, chain);
+	INIT_HLIST_HEAD(&mp->m_list);
 	return mp;
 }
 
@@ -738,6 +740,7 @@ static void put_mountpoint(struct mountpoint *mp)
 {
 	if (!--mp->m_count) {
 		struct dentry *dentry = mp->m_dentry;
+		BUG_ON(!hlist_empty(&mp->m_list));
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags &= ~DCACHE_MOUNTED;
 		spin_unlock(&dentry->d_lock);
@@ -784,6 +787,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt_child);
 	hlist_del_init_rcu(&mnt->mnt_hash);
+	hlist_del_init(&mnt->mnt_mp_list);
 	put_mountpoint(mnt->mnt_mp);
 	mnt->mnt_mp = NULL;
 }
@@ -800,6 +804,7 @@ void mnt_set_mountpoint(struct mount *mnt,
 	child_mnt->mnt_mountpoint = dget(mp->m_dentry);
 	child_mnt->mnt_parent = mnt;
 	child_mnt->mnt_mp = mp;
+	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
 }
 
 /*
@@ -1342,6 +1347,7 @@ void umount_tree(struct mount *mnt, int how)
 		if (how < 2)
 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
 		if (mnt_has_parent(p)) {
+			hlist_del_init(&p->mnt_mp_list);
 			put_mountpoint(p->mnt_mp);
 			mnt_add_count(p->mnt_parent, -1);
 			/* move the reference to mountpoint into ->mnt_ex_mountpoint */
-- 
cgit v0.10.2


From e2dfa935464272395b4f35f4cc74ffcc87418b84 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 24 Feb 2014 17:32:34 -0800
Subject: vfs: factor out lookup_mountpoint from new_mountpoint

I am shortly going to add a new user of struct mountpoint that
needs to look up existing entries but does not want to create
a struct mountpoint if one does not exist.  Therefore to keep
the code simple and easy to read split out lookup_mountpoint
from new_mountpoint.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 99572dd..88fc3f4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -703,11 +703,10 @@ out:
 	return is_covered;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 {
 	struct hlist_head *chain = mp_hash(dentry);
 	struct mountpoint *mp;
-	int ret;
 
 	hlist_for_each_entry(mp, chain, m_hash) {
 		if (mp->m_dentry == dentry) {
@@ -718,6 +717,14 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
 			return mp;
 		}
 	}
+	return NULL;
+}
+
+static struct mountpoint *new_mountpoint(struct dentry *dentry)
+{
+	struct hlist_head *chain = mp_hash(dentry);
+	struct mountpoint *mp;
+	int ret;
 
 	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
 	if (!mp)
@@ -1818,7 +1825,9 @@ retry:
 	namespace_lock();
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
-		struct mountpoint *mp = new_mountpoint(dentry);
+		struct mountpoint *mp = lookup_mountpoint(dentry);
+		if (!mp)
+			mp = new_mountpoint(dentry);
 		if (IS_ERR(mp)) {
 			namespace_unlock();
 			mutex_unlock(&dentry->d_inode->i_mutex);
-- 
cgit v0.10.2


From 80b5dce8c59b0de1ed6e403b8298e02dcb4db64b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederman@twitter.com>
Date: Thu, 3 Oct 2013 01:31:18 -0700
Subject: vfs: Add a function to lazily unmount all mounts from any dentry.

The new function detach_mounts comes in two pieces.  The first piece
is a static inline test of d_mounpoint that returns immediately
without taking any locks if d_mounpoint is not set.  In the common
case when mountpoints are absent this allows the vfs to continue
running with it's same cacheline foot print.

The second piece of detach_mounts __detach_mounts actually does the
work and it assumes that a mountpoint is present so it is slow and
takes namespace_sem for write, and then locks the mount hash (aka
mount_lock) after a struct mountpoint has been found.

With those two locks held each entry on the list of mounts on a
mountpoint is selected and lazily unmounted until all of the mount
have been lazily unmounted.

v7: Wrote a proper change description and removed the changelog
    documenting deleted wrong turns.

Signed-off-by: Eric W. Biederman <ebiederman@twitter.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/mount.h b/fs/mount.h
index 68bb03e..f82c628 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -87,6 +87,15 @@ extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 
 extern bool legitimize_mnt(struct vfsmount *, unsigned);
 
+extern void __detach_mounts(struct dentry *dentry);
+
+static inline void detach_mounts(struct dentry *dentry)
+{
+	if (!d_mountpoint(dentry))
+		return;
+	__detach_mounts(dentry);
+}
+
 static inline void get_mnt_ns(struct mnt_namespace *ns)
 {
 	atomic_inc(&ns->count);
diff --git a/fs/namespace.c b/fs/namespace.c
index 88fc3f4..00e5b1e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1468,6 +1468,37 @@ static int do_umount(struct mount *mnt, int flags)
 	return retval;
 }
 
+/*
+ * __detach_mounts - lazily unmount all mounts on the specified dentry
+ *
+ * During unlink, rmdir, and d_drop it is possible to loose the path
+ * to an existing mountpoint, and wind up leaking the mount.
+ * detach_mounts allows lazily unmounting those mounts instead of
+ * leaking them.
+ *
+ * The caller may hold dentry->d_inode->i_mutex.
+ */
+void __detach_mounts(struct dentry *dentry)
+{
+	struct mountpoint *mp;
+	struct mount *mnt;
+
+	namespace_lock();
+	mp = lookup_mountpoint(dentry);
+	if (!mp)
+		goto out_unlock;
+
+	lock_mount_hash();
+	while (!hlist_empty(&mp->m_list)) {
+		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
+		umount_tree(mnt, 2);
+	}
+	unlock_mount_hash();
+	put_mountpoint(mp);
+out_unlock:
+	namespace_unlock();
+}
+
 /* 
  * Is the caller allowed to modify his namespace?
  */
-- 
cgit v0.10.2


From 8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederman@twitter.com>
Date: Tue, 1 Oct 2013 18:33:48 -0700
Subject: vfs: Lazily remove mounts on unlinked files and directories.

With the introduction of mount namespaces and bind mounts it became
possible to access files and directories that on some paths are mount
points but are not mount points on other paths.  It is very confusing
when rm -rf somedir returns -EBUSY simply because somedir is mounted
somewhere else.  With the addition of user namespaces allowing
unprivileged mounts this condition has gone from annoying to allowing
a DOS attack on other users in the system.

The possibility for mischief is removed by updating the vfs to support
rename, unlink and rmdir on a dentry that is a mountpoint and by
lazily unmounting mountpoints on deleted dentries.

In particular this change allows rename, unlink and rmdir system calls
on a dentry without a mountpoint in the current mount namespace to
succeed, and it allows rename, unlink, and rmdir performed on a
distributed filesystem to update the vfs cache even if when there is a
mount in some namespace on the original dentry.

There are two common patterns of maintaining mounts: Mounts on trusted
paths with the parent directory of the mount point and all ancestory
directories up to / owned by root and modifiable only by root
(i.e. /media/xxx, /dev, /dev/pts, /proc, /sys, /sys/fs/cgroup/{cpu,
cpuacct, ...}, /usr, /usr/local).  Mounts on unprivileged directories
maintained by fusermount.

In the case of mounts in trusted directories owned by root and
modifiable only by root the current parent directory permissions are
sufficient to ensure a mount point on a trusted path is not removed
or renamed by anyone other than root, even if there is a context
where the there are no mount points to prevent this.

In the case of mounts in directories owned by less privileged users
races with users modifying the path of a mount point are already a
danger.  fusermount already uses a combination of chdir,
/proc/<pid>/fd/NNN, and UMOUNT_NOFOLLOW to prevent these races.  The
removable of global rename, unlink, and rmdir protection really adds
nothing new to consider only a widening of the attack window, and
fusermount is already safe against unprivileged users modifying the
directory simultaneously.

In principle for perfect userspace programs returning -EBUSY for
unlink, rmdir, and rename of dentires that have mounts in the local
namespace is actually unnecessary.  Unfortunately not all userspace
programs are perfect so retaining -EBUSY for unlink, rmdir and rename
of dentries that have mounts in the current mount namespace plays an
important role of maintaining consistency with historical behavior and
making imperfect userspace applications hard to exploit.

v2: Remove spurious old_dentry.
v3: Optimized shrink_submounts_and_drop
    Removed unsued afs label
v4: Simplified the changes to check_submounts_and_drop
    Do not rename check_submounts_and_drop shrink_submounts_and_drop
    Document what why we need atomicity in check_submounts_and_drop
    Rely on the parent inode mutex to make d_revalidate and d_invalidate
    an atomic unit.
v5: Refcount the mountpoint to detach in case of simultaneous
    renames.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 8150e4e..484114a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1343,36 +1343,39 @@ void shrink_dcache_for_umount(struct super_block *sb)
 	}
 }
 
-static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
+struct detach_data {
+	struct select_data select;
+	struct dentry *mountpoint;
+};
+static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
 {
-	struct select_data *data = _data;
+	struct detach_data *data = _data;
 
 	if (d_mountpoint(dentry)) {
-		data->found = -EBUSY;
+		__dget_dlock(dentry);
+		data->mountpoint = dentry;
 		return D_WALK_QUIT;
 	}
 
-	return select_collect(_data, dentry);
+	return select_collect(&data->select, dentry);
 }
 
 static void check_and_drop(void *_data)
 {
-	struct select_data *data = _data;
+	struct detach_data *data = _data;
 
-	if (d_mountpoint(data->start))
-		data->found = -EBUSY;
-	if (!data->found)
-		__d_drop(data->start);
+	if (!data->mountpoint && !data->select.found)
+		__d_drop(data->select.start);
 }
 
 /**
- * check_submounts_and_drop - prune dcache, check for submounts and drop
+ * check_submounts_and_drop - detach submounts, prune dcache, and drop
  *
- * All done as a single atomic operation relative to has_unlinked_ancestor().
- * Returns 0 if successfully unhashed @parent.  If there were submounts then
- * return -EBUSY.
+ * The final d_drop is done as an atomic operation relative to
+ * rename_lock ensuring there are no races with d_set_mounted.  This
+ * ensures there are no unhashed dentries on the path to a mountpoint.
  *
- * @dentry: dentry to prune and drop
+ * @dentry: dentry to detach, prune and drop
  */
 int check_submounts_and_drop(struct dentry *dentry)
 {
@@ -1385,19 +1388,24 @@ int check_submounts_and_drop(struct dentry *dentry)
 	}
 
 	for (;;) {
-		struct select_data data;
+		struct detach_data data;
 
-		INIT_LIST_HEAD(&data.dispose);
-		data.start = dentry;
-		data.found = 0;
+		data.mountpoint = NULL;
+		INIT_LIST_HEAD(&data.select.dispose);
+		data.select.start = dentry;
+		data.select.found = 0;
+
+		d_walk(dentry, &data, detach_and_collect, check_and_drop);
 
-		d_walk(dentry, &data, check_and_collect, check_and_drop);
-		ret = data.found;
+		if (data.select.found)
+			shrink_dentry_list(&data.select.dispose);
 
-		if (!list_empty(&data.dispose))
-			shrink_dentry_list(&data.dispose);
+		if (data.mountpoint) {
+			detach_mounts(data.mountpoint);
+			dput(data.mountpoint);
+		}
 
-		if (ret <= 0)
+		if (!data.mountpoint && !data.select.found)
 			break;
 
 		cond_resched();
@@ -2639,10 +2647,8 @@ static struct dentry *__d_unalias(struct inode *inode,
 		goto out_err;
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
-	if (likely(!d_mountpoint(alias))) {
-		__d_move(alias, dentry, false);
-		ret = alias;
-	}
+	__d_move(alias, dentry, false);
+	ret = alias;
 out_err:
 	spin_unlock(&inode->i_lock);
 	if (m2)
diff --git a/fs/namei.c b/fs/namei.c
index a3a14b0..2ba1090 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3567,8 +3567,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	error = -EBUSY;
 	if (is_local_mountpoint(dentry))
 		goto out;
-	if (d_mountpoint(dentry))
-		goto out;
 
 	error = security_inode_rmdir(dir, dentry);
 	if (error)
@@ -3581,6 +3579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	dentry->d_inode->i_flags |= S_DEAD;
 	dont_mount(dentry);
+	detach_mounts(dentry);
 
 out:
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -3683,7 +3682,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
 		return -EPERM;
 
 	mutex_lock(&target->i_mutex);
-	if (is_local_mountpoint(dentry) || d_mountpoint(dentry))
+	if (is_local_mountpoint(dentry))
 		error = -EBUSY;
 	else {
 		error = security_inode_unlink(dir, dentry);
@@ -3692,8 +3691,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
 			if (error)
 				goto out;
 			error = dir->i_op->unlink(dir, dentry);
-			if (!error)
+			if (!error) {
 				dont_mount(dentry);
+				detach_mounts(dentry);
+			}
 		}
 	}
 out:
@@ -4130,8 +4131,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	error = -EBUSY;
 	if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
 		goto out;
-	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
-		goto out;
 
 	if (max_links && new_dir != old_dir) {
 		error = -EMLINK;
@@ -4168,6 +4167,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (is_dir)
 			target->i_flags |= S_DEAD;
 		dont_mount(new_dentry);
+		detach_mounts(new_dentry);
 	}
 	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
 		if (!(flags & RENAME_EXCHANGE))
-- 
cgit v0.10.2


From 9b053f3207e8887fed88162a339fdd4001abcdb2 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:34:30 -0800
Subject: vfs: Remove unnecessary calls of check_submounts_and_drop

Now that check_submounts_and_drop can not fail and is called from
d_invalidate there is no longer a need to call check_submounts_and_drom
from filesystem d_revalidate methods so remove it.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5293003..a1645b8 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -669,7 +669,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 out_valid:
 	dentry->d_fsdata = dir_version;
-out_skip:
 	dput(parent);
 	key_put(key);
 	_leave(" = 1 [valid]");
@@ -682,10 +681,6 @@ not_found:
 	spin_unlock(&dentry->d_lock);
 
 out_bad:
-	/* don't unhash if we have submounts */
-	if (check_submounts_and_drop(dentry) != 0)
-		goto out_skip;
-
 	_debug("dropping dentry %s/%s",
 	       parent->d_name.name, dentry->d_name.name);
 	dput(parent);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index de1d84a..820efd7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -274,9 +274,6 @@ out:
 
 invalid:
 	ret = 0;
-
-	if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0)
-		ret = 1;
 	goto out;
 }
 
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index d3a5d4e..589f4ea 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -93,9 +93,6 @@ invalid_gunlock:
 	if (!had_lock)
 		gfs2_glock_dq_uninit(&d_gh);
 invalid:
-	if (check_submounts_and_drop(dentry) != 0)
-		goto valid;
-
 	dput(parent);
 	return 0;
 
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index a693f5b..1c77193 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -463,21 +463,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 		goto out_bad;
 
 	mutex_unlock(&kernfs_mutex);
-out_valid:
 	return 1;
 out_bad:
 	mutex_unlock(&kernfs_mutex);
 out_bad_unlocked:
-	/*
-	 * @dentry doesn't match the underlying kernfs node, drop the
-	 * dentry and force lookup.  If we have submounts we must allow the
-	 * vfs caches to lie about the state of the filesystem to prevent
-	 * leaks and other nasty things, so use check_submounts_and_drop()
-	 * instead of d_drop().
-	 */
-	if (check_submounts_and_drop(dentry) != 0)
-		goto out_valid;
-
 	return 0;
 }
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 36d921f..8be6988 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1211,10 +1211,6 @@ out_zap_parent:
 		if (IS_ROOT(dentry))
 			goto out_valid;
 	}
-	/* If we have submounts, don't unhash ! */
-	if (check_submounts_and_drop(dentry) != 0)
-		goto out_valid;
-
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
 			__func__, dentry);
-- 
cgit v0.10.2


From 1ffe46d11cc88479797b262f60d92e5fb461b411 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:39:37 -0800
Subject: vfs: Merge check_submounts_and_drop and d_invalidate

Now that d_invalidate is the only caller of check_submounts_and_drop,
expand check_submounts_and_drop inline in d_invalidate.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 484114a..5e02b9e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -645,32 +645,6 @@ kill_it:
 }
 EXPORT_SYMBOL(dput);
 
-/**
- * d_invalidate - invalidate a dentry
- * @dentry: dentry to invalidate
- *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
- * no dcache lock.
- */
- 
-int d_invalidate(struct dentry * dentry)
-{
-	/*
-	 * If it's already been dropped, return OK.
-	 */
-	spin_lock(&dentry->d_lock);
-	if (d_unhashed(dentry)) {
-		spin_unlock(&dentry->d_lock);
-		return 0;
-	}
-	spin_unlock(&dentry->d_lock);
-
-	return check_submounts_and_drop(dentry);
-}
-EXPORT_SYMBOL(d_invalidate);
 
 /* This must be called with d_lock held */
 static inline void __dget_dlock(struct dentry *dentry)
@@ -1190,7 +1164,7 @@ EXPORT_SYMBOL(have_submounts);
  * reachable (e.g. NFS can unhash a directory dentry and then the complete
  * subtree can become unreachable).
  *
- * Only one of check_submounts_and_drop() and d_set_mounted() must succeed.  For
+ * Only one of d_invalidate() and d_set_mounted() must succeed.  For
  * this reason take rename_lock and d_lock on dentry and ancestors.
  */
 int d_set_mounted(struct dentry *dentry)
@@ -1199,7 +1173,7 @@ int d_set_mounted(struct dentry *dentry)
 	int ret = -ENOENT;
 	write_seqlock(&rename_lock);
 	for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
-		/* Need exclusion wrt. check_submounts_and_drop() */
+		/* Need exclusion wrt. d_invalidate() */
 		spin_lock(&p->d_lock);
 		if (unlikely(d_unhashed(p))) {
 			spin_unlock(&p->d_lock);
@@ -1369,18 +1343,33 @@ static void check_and_drop(void *_data)
 }
 
 /**
- * check_submounts_and_drop - detach submounts, prune dcache, and drop
+ * d_invalidate - detach submounts, prune dcache, and drop
+ * @dentry: dentry to invalidate (aka detach, prune and drop)
+ *
+ * Try to invalidate the dentry if it turns out to be
+ * possible. If there are reasons not to delete it
+ * return -EBUSY. On success return 0.
+ *
+ * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
- *
- * @dentry: dentry to detach, prune and drop
  */
-int check_submounts_and_drop(struct dentry *dentry)
+int d_invalidate(struct dentry *dentry)
 {
 	int ret = 0;
 
+	/*
+	 * If it's already been dropped, return OK.
+	 */
+	spin_lock(&dentry->d_lock);
+	if (d_unhashed(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 0;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
@@ -1414,7 +1403,7 @@ int check_submounts_and_drop(struct dentry *dentry)
 out:
 	return ret;
 }
-EXPORT_SYMBOL(check_submounts_and_drop);
+EXPORT_SYMBOL(d_invalidate);
 
 /**
  * __d_alloc	-	allocate a dcache entry
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 75a227c..595af29 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -269,7 +269,6 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
-extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
cgit v0.10.2


From 5542aa2fa7f6cddb03c4ac3135e390adffda98ca Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:46:25 -0800
Subject: vfs: Make d_invalidate return void

Now that d_invalidate can no longer fail, stop returning a useless
return code.  For the few callers that checked the return code update
remove the handling of d_invalidate failure.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8a8e298..996eb19 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2423,9 +2423,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		goto out_dput;
 	}
 
-	err = d_invalidate(dentry);
-	if (err)
-		goto out_unlock;
+	d_invalidate(dentry);
 
 	down_write(&root->fs_info->subvol_sem);
 
@@ -2510,7 +2508,6 @@ out_release:
 	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
 out_up_write:
 	up_write(&root->fs_info->subvol_sem);
-out_unlock:
 	if (err) {
 		spin_lock(&dest->root_item_lock);
 		root_flags = btrfs_root_flags(&dest->root_item);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b334a89..d2141f1 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -87,8 +87,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 		return;
 
 	if (dentry) {
-		int err;
-
 		inode = dentry->d_inode;
 		if (inode) {
 			/*
@@ -105,10 +103,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 				goto out;
 			}
 		}
-		err = d_invalidate(dentry);
+		d_invalidate(dentry);
 		dput(dentry);
-		if (err)
-			return;
 	}
 
 	/*
diff --git a/fs/dcache.c b/fs/dcache.c
index 5e02b9e..70d102e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1346,34 +1346,28 @@ static void check_and_drop(void *_data)
  * d_invalidate - detach submounts, prune dcache, and drop
  * @dentry: dentry to invalidate (aka detach, prune and drop)
  *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
  * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
  */
-int d_invalidate(struct dentry *dentry)
+void d_invalidate(struct dentry *dentry)
 {
-	int ret = 0;
-
 	/*
 	 * If it's already been dropped, return OK.
 	 */
 	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
-		return 0;
+		return;
 	}
 	spin_unlock(&dentry->d_lock);
 
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
-		goto out;
+		return;
 	}
 
 	for (;;) {
@@ -1399,9 +1393,6 @@ int d_invalidate(struct dentry *dentry)
 
 		cond_resched();
 	}
-
-out:
-	return ret;
 }
 EXPORT_SYMBOL(d_invalidate);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 820efd7..dbab798 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1286,9 +1286,7 @@ static int fuse_direntplus_link(struct file *file,
 			d_drop(dentry);
 		} else if (get_node_id(inode) != o->nodeid ||
 			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
-			err = d_invalidate(dentry);
-			if (err)
-				goto out;
+			d_invalidate(dentry);
 		} else if (is_bad_inode(inode)) {
 			err = -EIO;
 			goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 2ba1090..d20d579 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1306,7 +1306,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 				if (error < 0) {
 					dput(dentry);
 					return ERR_PTR(error);
-				} else if (!d_invalidate(dentry)) {
+				} else {
+					d_invalidate(dentry);
 					dput(dentry);
 					dentry = NULL;
 				}
@@ -1435,10 +1436,9 @@ unlazy:
 			dput(dentry);
 			return status;
 		}
-		if (!d_invalidate(dentry)) {
-			dput(dentry);
-			goto need_lookup;
-		}
+		d_invalidate(dentry);
+		dput(dentry);
+		goto need_lookup;
 	}
 
 	path->mnt = mnt;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8be6988..06e8cfc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -486,8 +486,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 				nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
 			goto out;
 		} else {
-			if (d_invalidate(dentry) != 0)
-				goto out;
+			d_invalidate(dentry);
 			dput(dentry);
 		}
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 595af29..81b0315 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -254,7 +254,7 @@ extern struct dentry * d_obtain_root(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-extern int d_invalidate(struct dentry *);
+extern void d_invalidate(struct dentry *);
 
 /* only used at mount-time */
 extern struct dentry * d_make_root(struct inode *);
-- 
cgit v0.10.2


From c143c2333c48f1430231b31a8c17e074b9b504eb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 10:19:10 -0800
Subject: vfs: Remove d_drop calls from d_revalidate implementations

Now that d_invalidate always succeeds it is not longer necessary or
desirable to hard code d_drop calls into filesystem specific
d_revalidate implementations.

Remove the unnecessary d_drop calls and rely on d_invalidate
to drop the dentries.  Using d_invalidate ensures that paths
to mount points will not be dropped.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c29d6ae..b6c59ea 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1069,7 +1069,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 		ceph_dentry_lru_touch(dentry);
 	} else {
 		ceph_dir_clear_complete(dir);
-		d_drop(dentry);
 	}
 	iput(dir);
 	return valid;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index baf852b..b4fe0ee 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1590,7 +1590,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
 		put_task_struct(task);
 		return 1;
 	}
-	d_drop(dentry);
 	return 0;
 }
 
@@ -1727,9 +1726,6 @@ out:
 	put_task_struct(task);
 
 out_notask:
-	if (status <= 0)
-		d_drop(dentry);
-
 	return status;
 }
 
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 955bb55..e11d7c5 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -129,8 +129,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 		}
 		put_task_struct(task);
 	}
-
-	d_drop(dentry);
 	return 0;
 }
 
-- 
cgit v0.10.2


From bbd5192412fdedbae00888316bfe350bf89d0458 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 10:24:23 -0800
Subject: proc: Update proc_flush_task_mnt to use d_invalidate

Now that d_invalidate always succeeds and flushes mount points use
it in stead of a combination of shrink_dcache_parent and d_drop
in proc_flush_task_mnt.  This removes the danger of a mount point
under /proc/<pid>/... becoming unreachable after the d_drop.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b4fe0ee..00cd85f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2639,8 +2639,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 	/* no ->d_hash() rejects on procfs */
 	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
-		shrink_dcache_parent(dentry);
-		d_drop(dentry);
+		d_invalidate(dentry);
 		dput(dentry);
 	}
 
@@ -2660,8 +2659,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 	name.len = snprintf(buf, sizeof(buf), "%d", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
-		shrink_dcache_parent(dentry);
-		d_drop(dentry);
+		d_invalidate(dentry);
 		dput(dentry);
 	}
 
-- 
cgit v0.10.2


From 29355c3904e1765948c7721719a028b7eb5dfe1d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 30 May 2014 11:25:30 -0400
Subject: d_prune_alias(): just lock the parent and call __dentry_kill()

The only reason for games with ->d_prune() was __d_drop(), which
was needed only to force dput() into killing the sucker off.

Note that lock_parent() can be called under ->i_lock and won't
drop it, so dentry is safe from somebody managing to kill it
under us - it won't happen while we are holding ->i_lock.

__dentry_kill() is called only with ->d_lockref.count being 0
(here and when picked from shrink list) or 1 (dput() and dropping
the ancestors in shrink_dentry_list()), so it will never be called
twice - the first thing it's doing is making ->d_lockref.count
negative and once that happens, nothing will increment it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 70d102e..d8a77b1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -770,20 +770,13 @@ restart:
 	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_lockref.count) {
-			/*
-			 * inform the fs via d_prune that this dentry
-			 * is about to be unhashed and destroyed.
-			 */
-			if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
-			    !d_unhashed(dentry))
-				dentry->d_op->d_prune(dentry);
-
-			__dget_dlock(dentry);
-			__d_drop(dentry);
-			spin_unlock(&dentry->d_lock);
-			spin_unlock(&inode->i_lock);
-			dput(dentry);
-			goto restart;
+			struct dentry *parent = lock_parent(dentry);
+			if (likely(!dentry->d_lockref.count)) {
+				__dentry_kill(dentry);
+				goto restart;
+			}
+			if (parent)
+				spin_unlock(&parent->d_lock);
 		}
 		spin_unlock(&dentry->d_lock);
 	}
-- 
cgit v0.10.2


From 2926620145095ffb0350b2312ac9d0af8537796f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 30 May 2014 11:39:02 -0400
Subject: dcache.c: call ->d_prune() regardless of d_unhashed()

the only in-tree instance checks d_unhashed() anyway,
out-of-tree code can preserve the current behaviour by
adding such check if they want it and we get an ability
to use it in cases where we *want* to be notified of
killing being inevitable before ->d_lock is dropped,
whether it's unhashed or not.  In particular, autofs
would benefit from that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index d8a77b1..21eee4c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -482,7 +482,7 @@ static void __dentry_kill(struct dentry *dentry)
 	 * inform the fs via d_prune that this dentry is about to be
 	 * unhashed and destroyed.
 	 */
-	if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
+	if (dentry->d_flags & DCACHE_OP_PRUNE)
 		dentry->d_op->d_prune(dentry);
 
 	if (dentry->d_flags & DCACHE_LRU_LIST) {
-- 
cgit v0.10.2


From 19d860a140beac48a1377f179e693abe86a9dac9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 May 2014 20:11:36 -0400
Subject: handle suicide on late failure exits in execve() in
 search_binary_handler()

... rather than doing that in the guts of ->load_binary().
[updated to fix the bug spotted by Shentino - for SIGSEGV we really need
something stronger than send_sig_info(); again, better do that in one place]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index d21ff89..df91466 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,11 +308,8 @@ static int load_aout_binary(struct linux_binprm *bprm)
 		(current->mm->start_brk = N_BSSADDR(ex));
 
 	retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-	if (retval < 0) {
-		/* Someone check-me: is this error path enough? */
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		return retval;
-	}
 
 	install_exec_creds(bprm);
 
@@ -324,17 +321,13 @@ static int load_aout_binary(struct linux_binprm *bprm)
 
 		error = vm_brk(text_addr & PAGE_MASK, map_size);
 
-		if (error != (text_addr & PAGE_MASK)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != (text_addr & PAGE_MASK))
 			return error;
-		}
 
 		error = read_code(bprm->file, text_addr, 32,
 				  ex.a_text + ex.a_data);
-		if ((signed long)error < 0) {
-			send_sig(SIGKILL, current, 0);
+		if ((signed long)error < 0)
 			return error;
-		}
 	} else {
 #ifdef WARN_OLD
 		static unsigned long error_time, error_time2;
@@ -368,20 +361,16 @@ static int load_aout_binary(struct linux_binprm *bprm)
 				MAP_EXECUTABLE | MAP_32BIT,
 				fd_offset);
 
-		if (error != N_TXTADDR(ex)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != N_TXTADDR(ex))
 			return error;
-		}
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
 				MAP_EXECUTABLE | MAP_32BIT,
 				fd_offset + ex.a_text);
-		if (error != N_DATADDR(ex)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != N_DATADDR(ex))
 			return error;
-		}
 	}
 beyond_if:
 	set_binfmt(&aout_format);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ca0ba15..929dec0 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -256,11 +256,8 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		(current->mm->start_brk = N_BSSADDR(ex));
 
 	retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
-	if (retval < 0) {
-		/* Someone check-me: is this error path enough? */
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		return retval;
-	}
 
 	install_exec_creds(bprm);
 
@@ -278,17 +275,13 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		map_size = ex.a_text+ex.a_data;
 #endif
 		error = vm_brk(text_addr & PAGE_MASK, map_size);
-		if (error != (text_addr & PAGE_MASK)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != (text_addr & PAGE_MASK))
 			return error;
-		}
 
 		error = read_code(bprm->file, text_addr, pos,
 				  ex.a_text+ex.a_data);
-		if ((signed long)error < 0) {
-			send_sig(SIGKILL, current, 0);
+		if ((signed long)error < 0)
 			return error;
-		}
 	} else {
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
@@ -315,28 +308,22 @@ static int load_aout_binary(struct linux_binprm * bprm)
 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 			fd_offset);
 
-		if (error != N_TXTADDR(ex)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != N_TXTADDR(ex))
 			return error;
-		}
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 				fd_offset + ex.a_text);
-		if (error != N_DATADDR(ex)) {
-			send_sig(SIGKILL, current, 0);
+		if (error != N_DATADDR(ex))
 			return error;
-		}
 	}
 beyond_if:
 	set_binfmt(&aout_format);
 
 	retval = set_brk(current->mm->start_brk, current->mm->brk);
-	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		return retval;
-	}
 
 	current->mm->start_stack =
 		(unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3892c1a..d8fc060 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -738,10 +738,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	   change some of these later */
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 				 executable_stack);
-	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		goto out_free_dentry;
-	}
 	
 	current->mm->start_stack = bprm->p;
 
@@ -763,10 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 			   and clear the area.  */
 			retval = set_brk(elf_bss + load_bias,
 					 elf_brk + load_bias);
-			if (retval) {
-				send_sig(SIGKILL, current, 0);
+			if (retval)
 				goto out_free_dentry;
-			}
 			nbyte = ELF_PAGEOFFSET(elf_bss);
 			if (nbyte) {
 				nbyte = ELF_MIN_ALIGN - nbyte;
@@ -820,7 +816,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 				elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
-			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?
 				PTR_ERR((void*)error) : -EINVAL;
 			goto out_free_dentry;
@@ -851,7 +846,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		    elf_ppnt->p_memsz > TASK_SIZE ||
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work. Avoid overflows. */
-			send_sig(SIGKILL, current, 0);
 			retval = -EINVAL;
 			goto out_free_dentry;
 		}
@@ -883,12 +877,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	 * up getting placed where the bss needs to go.
 	 */
 	retval = set_brk(elf_bss, elf_brk);
-	if (retval) {
-		send_sig(SIGKILL, current, 0);
+	if (retval)
 		goto out_free_dentry;
-	}
 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
-		send_sig(SIGSEGV, current, 0);
 		retval = -EFAULT; /* Nobody gets to see this, but.. */
 		goto out_free_dentry;
 	}
@@ -909,7 +900,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 			elf_entry += loc->interp_elf_ex.e_entry;
 		}
 		if (BAD_ADDR(elf_entry)) {
-			force_sig(SIGSEGV, current);
 			retval = IS_ERR((void *)elf_entry) ?
 					(int)elf_entry : -EINVAL;
 			goto out_free_dentry;
@@ -922,7 +912,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	} else {
 		elf_entry = loc->elf_ex.e_entry;
 		if (BAD_ADDR(elf_entry)) {
-			force_sig(SIGSEGV, current);
 			retval = -EINVAL;
 			goto out_free_dentry;
 		}
@@ -934,19 +923,15 @@ static int load_elf_binary(struct linux_binprm *bprm)
 
 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
-	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		goto out;
-	}
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
 	install_exec_creds(bprm);
 	retval = create_elf_tables(bprm, &loc->elf_ex,
 			  load_addr, interp_load_addr);
-	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
+	if (retval < 0)
 		goto out;
-	}
 	/* N.B. passed_fileno might not be initialized? */
 	current->mm->end_code = end_code;
 	current->mm->start_code = start_code;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fe2a643..d3634bf 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -317,8 +317,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 		goto error;
 
 	/* there's now no turning back... the old userspace image is dead,
-	 * defunct, deceased, etc. after this point we have to exit via
-	 * error_kill */
+	 * defunct, deceased, etc.
+	 */
 	set_personality(PER_LINUX_FDPIC);
 	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
@@ -343,24 +343,22 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 
 	retval = setup_arg_pages(bprm, current->mm->start_stack,
 				 executable_stack);
-	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
-		goto error_kill;
-	}
+	if (retval < 0)
+		goto error;
 #endif
 
 	/* load the executable and interpreter into memory */
 	retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm,
 				    "executable");
 	if (retval < 0)
-		goto error_kill;
+		goto error;
 
 	if (interpreter_name) {
 		retval = elf_fdpic_map_file(&interp_params, interpreter,
 					    current->mm, "interpreter");
 		if (retval < 0) {
 			printk(KERN_ERR "Unable to load interpreter\n");
-			goto error_kill;
+			goto error;
 		}
 
 		allow_write_access(interpreter);
@@ -397,7 +395,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 	if (IS_ERR_VALUE(current->mm->start_brk)) {
 		retval = current->mm->start_brk;
 		current->mm->start_brk = 0;
-		goto error_kill;
+		goto error;
 	}
 
 	current->mm->brk = current->mm->start_brk;
@@ -410,7 +408,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 	install_exec_creds(bprm);
 	if (create_elf_fdpic_tables(bprm, current->mm,
 				    &exec_params, &interp_params) < 0)
-		goto error_kill;
+		goto error;
 
 	kdebug("- start_code  %lx", current->mm->start_code);
 	kdebug("- end_code    %lx", current->mm->end_code);
@@ -449,12 +447,6 @@ error:
 	kfree(interp_params.phdrs);
 	kfree(interp_params.loadmap);
 	return retval;
-
-	/* unrecoverable error - kill the process */
-error_kill:
-	send_sig(SIGSEGV, current, 0);
-	goto error;
-
 }
 
 /*****************************************************************************/
diff --git a/fs/exec.c b/fs/exec.c
index a2b42a9..7302b75 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1372,18 +1372,23 @@ int search_binary_handler(struct linux_binprm *bprm)
 		read_unlock(&binfmt_lock);
 		bprm->recursion_depth++;
 		retval = fmt->load_binary(bprm);
+		read_lock(&binfmt_lock);
+		put_binfmt(fmt);
 		bprm->recursion_depth--;
-		if (retval >= 0 || retval != -ENOEXEC ||
-		    bprm->mm == NULL || bprm->file == NULL) {
-			put_binfmt(fmt);
+		if (retval < 0 && !bprm->mm) {
+			/* we got to flush_old_exec() and failed after it */
+			read_unlock(&binfmt_lock);
+			force_sigsegv(SIGSEGV, current);
+			return retval;
+		}
+		if (retval != -ENOEXEC || !bprm->file) {
+			read_unlock(&binfmt_lock);
 			return retval;
 		}
-		read_lock(&binfmt_lock);
-		put_binfmt(fmt);
 	}
 	read_unlock(&binfmt_lock);
 
-	if (need_retry && retval == -ENOEXEC) {
+	if (need_retry) {
 		if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
 		    printable(bprm->buf[2]) && printable(bprm->buf[3]))
 			return retval;
-- 
cgit v0.10.2


From 1fa97e8b1f327059aa98089abd8c3378cdf43017 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 May 2014 20:47:49 -0400
Subject: constify file_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721..75bdd51 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1874,7 +1874,7 @@ extern int current_umask(void);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 
-static inline struct inode *file_inode(struct file *f)
+static inline struct inode *file_inode(const struct file *f)
 {
 	return f->f_inode;
 }
-- 
cgit v0.10.2


From 4e07ad6406d8137cc5aa1317568408c461ee8ce8 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Thu, 14 Aug 2014 15:25:10 +0400
Subject: vfs: fix typo in s_op->alloc_inode() documentation

The function which calls s_op->alloc_inode() is not inode_alloc(), but
instead alloc_inode() which lives in fs/inode.c .

The typo was there from the beginning from 5ea626aa (VFS: update
documentation, 2005) - there was no standalone inode_alloc() for the
whole kernel history.

Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Kirill Smelkov <kirr@nexedi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 61d65cc..02a766c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -237,7 +237,7 @@ noted. This means that most methods can block safely. All methods are
 only called from a process context (i.e. not from an interrupt handler
 or bottom half).
 
-  alloc_inode: this method is called by inode_alloc() to allocate memory
+  alloc_inode: this method is called by alloc_inode() to allocate memory
  	for struct inode and initialize it.  If this function is not
  	defined, a simple 'struct inode' is allocated.  Normally
  	alloc_inode will be used to allocate a larger structure which
-- 
cgit v0.10.2


From 99358a1ca53e8e6ce09423500191396f0e6584d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Fri, 1 Aug 2014 20:13:40 +0100
Subject: [jffs2] kill wbuf_queued/wbuf_dwork_lock

schedule_delayed_work() happening when the work is already pending is
a cheap no-op.  Don't bother with ->wbuf_queued logics - it's both
broken (cancelling ->wbuf_dwork leaves it set, as spotted by Jeff Harris)
and pointless.  It's cheaper to let schedule_delayed_work() handle that
case.

Reported-by: Jeff Harris <jefftharris@gmail.com>
Tested-by: Jeff Harris <jefftharris@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 413ef89..046fee8 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -134,8 +134,6 @@ struct jffs2_sb_info {
 	struct rw_semaphore wbuf_sem;	/* Protects the write buffer */
 
 	struct delayed_work wbuf_dwork; /* write-buffer write-out work */
-	int wbuf_queued;                /* non-zero delayed work is queued */
-	spinlock_t wbuf_dwork_lock;     /* protects wbuf_dwork and and wbuf_queued */
 
 	unsigned char *oobbuf;
 	int oobavail; /* How many bytes are available for JFFS2 in OOB */
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a6597d6..09ed551 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work)
 	struct jffs2_sb_info *c = work_to_sb(work);
 	struct super_block *sb = OFNI_BS_2SFFJ(c);
 
-	spin_lock(&c->wbuf_dwork_lock);
-	c->wbuf_queued = 0;
-	spin_unlock(&c->wbuf_dwork_lock);
-
 	if (!(sb->s_flags & MS_RDONLY)) {
 		jffs2_dbg(1, "%s()\n", __func__);
 		jffs2_flush_wbuf_gc(c, 0);
@@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	spin_lock(&c->wbuf_dwork_lock);
-	if (!c->wbuf_queued) {
+	delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+	if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay))
 		jffs2_dbg(1, "%s()\n", __func__);
-		delay = msecs_to_jiffies(dirty_writeback_interval * 10);
-		queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay);
-		c->wbuf_queued = 1;
-	}
-	spin_unlock(&c->wbuf_dwork_lock);
 }
 
 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
@@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
 
 	/* Initialise write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 	c->wbuf_pagesize = c->mtd->writesize;
 	c->wbuf_ofs = 0xFFFFFFFF;
@@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
 
 	/* Initialize write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 	c->wbuf_pagesize =  c->mtd->erasesize;
 
@@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
 
 	/* Initialize write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 
 	c->wbuf_pagesize = c->mtd->writesize;
@@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
 		return 0;
 
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 
 	c->wbuf_pagesize =  c->mtd->writesize;
-- 
cgit v0.10.2


From b8314f9303a985354f445763960c0db2d7948891 Mon Sep 17 00:00:00 2001
From: Daeseok Youn <daeseok.youn@gmail.com>
Date: Mon, 11 Aug 2014 11:46:53 +0900
Subject: dcache: Fix no spaces at the start of a line in dcache.c

Fixed coding style in dcache.c

Signed-off-by: Daeseok Youn <daeseok.youn@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 21eee4c..8221faae 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2093,10 +2093,10 @@ struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
 	struct dentry *dentry;
 	unsigned seq;
 
-        do {
-                seq = read_seqbegin(&rename_lock);
-                dentry = __d_lookup(parent, name);
-                if (dentry)
+	do {
+		seq = read_seqbegin(&rename_lock);
+		dentry = __d_lookup(parent, name);
+		if (dentry)
 			break;
 	} while (read_seqretry(&rename_lock, seq));
 	return dentry;
-- 
cgit v0.10.2


From 475d0db742e3755c6b267f48577ff7cbb7dfda0d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 17 May 2014 20:56:38 +0900
Subject: fs: Fix theoretical division by 0 in super_cache_scan().

total_objects could be 0 and is used as a denom.

While total_objects is a "long", total_objects == 0 unlikely happens for
3.12 and later kernels because 32-bit architectures would not be able to
hold (1 << 32) objects. However, total_objects == 0 may happen for kernels
between 3.1 and 3.11 because total_objects in prune_super() was an "int"
and (e.g.) x86_64 architecture might be able to hold (1 << 32) objects.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable <stable@kernel.org> # 3.1+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index b9a214d..6f8c954 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -80,6 +80,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
 	inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
 	dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
 	total_objects = dentries + inodes + fs_objects + 1;
+	if (!total_objects)
+		total_objects = 1;
 
 	/* proportion the scan between the caches */
 	dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
-- 
cgit v0.10.2


From c35e02480014f7a86e264a2fda39a568690163da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 1 Aug 2014 09:27:22 -0400
Subject: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

For DAX, we want to be able to copy between iovecs and kernel addresses
that don't necessarily have a struct page.  This is a fairly simple
rearrangement for bvec iters to kmap the pages outside and pass them in,
but for user iovecs it gets more complicated because we might try various
different ways to kmap the memory.  Duplicating the existing logic works
out best in this case.

We need to be able to write zeroes to an iovec for reads from unwritten
ranges in a file.  This is performed by the new iov_iter_zero() function,
again patterned after the existing code that handles iovec iterators.

[AV: and export the buggers...]

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 290fbf0..9b15814 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 9a09f20..eafcf60 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,6 +4,96 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_to_user(buf, from, copy);
+	copy -= left;
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_to_user(buf, from, copy);
+		copy -= left;
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -166,6 +256,50 @@ done:
 	return wanted - bytes;
 }
 
+static size_t zero_iovec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __clear_user(buf, copy);
+	copy -= left;
+	skip += copy;
+	bytes -= copy;
+
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __clear_user(buf, copy);
+		copy -= left;
+		skip = copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
@@ -414,12 +548,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t
 	kunmap_atomic(to);
 }
 
-static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static void memzero_page(struct page *page, size_t offset, size_t len)
+{
+	char *addr = kmap_atomic(page);
+	memset(addr + offset, 0, len);
+	kunmap_atomic(addr);
+}
+
+static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *from;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -432,8 +571,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	skip = i->iov_offset;
 	copy = min_t(size_t, bytes, bvec->bv_len - skip);
 
-	kaddr = kmap_atomic(page);
-	from = kaddr + offset;
 	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
 	skip += copy;
 	from += copy;
@@ -446,7 +583,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 		from += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -458,12 +594,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	return wanted - bytes;
 }
 
-static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *to;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -475,10 +609,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	bvec = i->bvec;
 	skip = i->iov_offset;
 
-	kaddr = kmap_atomic(page);
-
-	to = kaddr + offset;
-
 	copy = min(bytes, bvec->bv_len - skip);
 
 	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
@@ -495,7 +625,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 		to += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -507,6 +636,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	return wanted;
 }
 
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t zero_bvec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy);
+	skip += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memzero_page(bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		bytes -= copy;
+	}
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_from_user_bvec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
@@ -672,6 +856,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_to_iter_bvec(addr, bytes, i);
+	else
+		return copy_to_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_to_iter);
+
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_iter_bvec(addr, bytes, i);
+	else
+		return copy_from_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_from_iter);
+
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC) {
+		return zero_bvec(bytes, i);
+	} else {
+		return zero_iovec(bytes, i);
+	}
+}
+EXPORT_SYMBOL(iov_iter_zero);
+
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
-- 
cgit v0.10.2


From c2ca0fcd202863b14bd041a7fece2e789926c225 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 27 Jul 2014 13:00:41 -0400
Subject: fs: make cont_expand_zero interruptible

This patch makes it possible to kill a process looping in
cont_expand_zero. A process may spend a lot of time in this function, so
it is desirable to be able to kill it.

It happened to me that I wanted to copy a piece data from the disk to a
file. By mistake, I used the "seek" parameter to dd instead of "skip". Due
to the "seek" parameter, dd attempted to extend the file and became stuck
doing so - the only possibility was to reset the machine or wait many
hours until the filesystem runs out of space and cont_expand_zero fails.
We need this patch to be able to terminate the process.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/buffer.c b/fs/buffer.c
index 3588a80..c8a1c01 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2318,6 +2318,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
 		err = 0;
 
 		balance_dirty_pages_ratelimited(mapping);
+
+		if (unlikely(fatal_signal_pending(current))) {
+			err = -EINTR;
+			goto out;
+		}
 	}
 
 	/* page covers the boundary, find the boundary offset */
-- 
cgit v0.10.2


From 35c265e008b2c4e56765f07d47750d13f95a55c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 20:25:34 -0400
Subject: cifs: switch to use of %p[dD]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6cbd9c6..0736406 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -461,8 +461,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 
 	xid = get_xid();
 
-	cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n",
-		 inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
+		 inode, direntry, direntry);
 
 	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
 	if (IS_ERR(tlink)) {
@@ -540,8 +540,8 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 	struct cifs_fid fid;
 	__u32 oplock;
 
-	cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
-		 inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
+		 inode, direntry, direntry);
 
 	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
 	rc = PTR_ERR(tlink);
@@ -713,8 +713,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
 	xid = get_xid();
 
-	cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n",
-		 parent_dir_inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
+		 parent_dir_inode, direntry, direntry);
 
 	/* check whether path exists */
 
@@ -833,7 +833,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
 {
 	int rc = 0;
 
-	cifs_dbg(FYI, "In cifs d_delete, name = %s\n", direntry->d_name.name);
+	cifs_dbg(FYI, "In cifs d_delete, name = %pd\n", direntry);
 
 	return rc;
 }     */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7c018a1..dc3c7e6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1650,8 +1650,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
 
 	cifs_sb = CIFS_SB(dentry->d_sb);
 
-	cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
-		 write_size, *offset, dentry->d_name.name);
+	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
+		 write_size, *offset, dentry);
 
 	tcon = tlink_tcon(open_file->tlink);
 	server = tcon->ses->server;
@@ -2273,8 +2273,8 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
 
 	xid = get_xid();
 
-	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
-		 file->f_path.dentry->d_name.name, datasync);
+	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
+		 file, datasync);
 
 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
 		rc = cifs_zap_mapping(inode);
@@ -2315,8 +2315,8 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	xid = get_xid();
 
-	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
-		 file->f_path.dentry->d_name.name, datasync);
+	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
+		 file, datasync);
 
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7899a40..8fd4ee8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1419,8 +1419,8 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode,
 	d_instantiate(dentry, newinode);
 
 #ifdef CONFIG_CIFS_DEBUG2
-	cifs_dbg(FYI, "instantiated dentry %p %s to inode %p\n",
-		 dentry, dentry->d_name.name, newinode);
+	cifs_dbg(FYI, "instantiated dentry %p %pd to inode %p\n",
+		 dentry, dentry, newinode);
 
 	if (newinode->i_nlink != 2)
 		cifs_dbg(FYI, "unexpected number of links %d\n",
@@ -2111,8 +2111,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 	struct cifs_unix_set_info_args *args = NULL;
 	struct cifsFileInfo *open_file;
 
-	cifs_dbg(FYI, "setattr_unix on file %s attrs->ia_valid=0x%x\n",
-		 direntry->d_name.name, attrs->ia_valid);
+	cifs_dbg(FYI, "setattr_unix on file %pd attrs->ia_valid=0x%x\n",
+		 direntry, attrs->ia_valid);
 
 	xid = get_xid();
 
@@ -2254,8 +2254,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	xid = get_xid();
 
-	cifs_dbg(FYI, "setattr on file %s attrs->iavalid 0x%x\n",
-		 direntry->d_name.name, attrs->ia_valid);
+	cifs_dbg(FYI, "setattr on file %pd attrs->iavalid 0x%x\n",
+		 direntry, attrs->ia_valid);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
 		attrs->ia_valid |= ATTR_FORCE;
-- 
cgit v0.10.2


From 4b8e992392a2468f90ae8b5b9f3e95c5f54a61c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 20:17:38 -0400
Subject: 9p: switch to %p[dD]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index d51ec9f..47db55a 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -65,8 +65,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
 {
 	struct p9_fid *fid, *ret;
 
-	p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
-		 dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid),
+	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n",
+		 dentry, dentry, from_kuid(&init_user_ns, uid),
 		 any);
 	ret = NULL;
 	/* we'll recheck under lock if there's anything to look in */
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index cc1cfae..eb14e05 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -266,8 +266,8 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 	 * Now that we do caching with cache mode enabled, We need
 	 * to support direct IO
 	 */
-	p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
-		 iocb->ki_filp->f_path.dentry->d_name.name,
+	p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n",
+		 iocb->ki_filp,
 		 (long long)pos, iter->nr_segs);
 
 	return -EINVAL;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b03dd23..a345b2d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -49,8 +49,8 @@
  */
 static int v9fs_cached_dentry_delete(const struct dentry *dentry)
 {
-	p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-		 dentry->d_name.name, dentry);
+	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
+		 dentry, dentry);
 
 	/* Don't cache negative dentries */
 	if (!dentry->d_inode)
@@ -67,8 +67,8 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
 static void v9fs_dentry_release(struct dentry *dentry)
 {
 	struct hlist_node *p, *n;
-	p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-		 dentry->d_name.name, dentry);
+	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
+		 dentry, dentry);
 	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
 		p9_client_clunk(hlist_entry(p, struct p9_fid, dlist));
 	dentry->d_fsdata = NULL;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 0b3bfa3..4f11510 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -116,7 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
 	int reclen = 0;
 	struct p9_rdir *rdir;
 
-	p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
 	fid = file->private_data;
 
 	buflen = fid->clnt->msize - P9_IOHDRSZ;
@@ -172,7 +172,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)
 	struct p9_rdir *rdir;
 	struct p9_dirent curdirent;
 
-	p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
 	fid = file->private_data;
 
 	buflen = fid->clnt->msize - P9_READDIRHDRSZ;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 520c11c..5594505 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -301,8 +301,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
 	struct inode *inode = file_inode(filp);
 	int ret = -ENOLCK;
 
-	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
-		 filp, cmd, fl, filp->f_path.dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
+		 filp, cmd, fl, filp);
 
 	/* No mandatory locks */
 	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -337,8 +337,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
 	struct inode *inode = file_inode(filp);
 	int ret = -ENOLCK;
 
-	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
-		 filp, cmd, fl, filp->f_path.dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
+		 filp, cmd, fl, filp);
 
 	/* No mandatory locks */
 	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7fa4f7a..296482f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -648,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
 
-	p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 
 	err = 0;
 	ofid = NULL;
@@ -755,7 +755,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	struct p9_fid *fid;
 	struct v9fs_session_info *v9ses;
 
-	p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 	err = 0;
 	v9ses = v9fs_inode2v9ses(dir);
 	perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
@@ -791,8 +791,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	char *name;
 
-	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n",
-		 dir, dentry->d_name.name, dentry, flags);
+	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n",
+		 dir, dentry, dentry, flags);
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -1239,7 +1239,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	struct p9_fid *fid;
 	struct p9_wstat *st;
 
-	p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, " %pd\n", dentry);
 	retval = -EPERM;
 	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_lookup(dentry);
@@ -1262,8 +1262,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	retval = min(strlen(st->extension)+1, (size_t)buflen);
 	memcpy(buffer, st->extension, retval);
 
-	p9_debug(P9_DEBUG_VFS, "%s -> %s (%.*s)\n",
-		 dentry->d_name.name, st->extension, buflen, buffer);
+	p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n",
+		 dentry, st->extension, buflen, buffer);
 
 done:
 	p9stat_free(st);
@@ -1283,7 +1283,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	int len = 0;
 	char *link = __getname();
 
-	p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
 
 	if (!link)
 		link = ERR_PTR(-ENOMEM);
@@ -1314,8 +1314,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
 	char *s = nd_get_link(nd);
 
-	p9_debug(P9_DEBUG_VFS, " %s %s\n",
-		 dentry->d_name.name, IS_ERR(s) ? "<error>" : s);
+	p9_debug(P9_DEBUG_VFS, " %pd %s\n",
+		 dentry, IS_ERR(s) ? "<error>" : s);
 	if (!IS_ERR(s))
 		__putname(s);
 }
@@ -1364,8 +1364,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 static int
 v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
-	p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
-		 dir->i_ino, dentry->d_name.name, symname);
+	p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n",
+		 dir->i_ino, dentry, symname);
 
 	return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
 }
@@ -1386,8 +1386,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	char *name;
 	struct p9_fid *oldfid;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
-		 dir->i_ino, dentry->d_name.name, old_dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n",
+		 dir->i_ino, dentry, old_dentry);
 
 	oldfid = v9fs_fid_clone(old_dentry);
 	if (IS_ERR(oldfid))
@@ -1428,8 +1428,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
 	char *name;
 	u32 perm;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
-		 dir->i_ino, dentry->d_name.name, mode,
+	p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
+		 dir->i_ino, dentry, mode,
 		 MAJOR(rdev), MINOR(rdev));
 
 	if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 1fa85aa..02b64f4 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -393,7 +393,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 	struct dentry *dir_dentry;
 	struct posix_acl *dacl = NULL, *pacl = NULL;
 
-	p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 	err = 0;
 	v9ses = v9fs_inode2v9ses(dir);
 
@@ -767,8 +767,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 	struct p9_fid *dfid, *oldfid;
 	struct v9fs_session_info *v9ses;
 
-	p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
-		 dir->i_ino, old_dentry->d_name.name, dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n",
+		 dir->i_ino, old_dentry, dentry);
 
 	v9ses = v9fs_inode2v9ses(dir);
 	dir_dentry = dentry->d_parent;
@@ -917,7 +917,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
 	char *link = __getname();
 	char *target;
 
-	p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
+	p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
 
 	if (!link) {
 		link = ERR_PTR(-ENOMEM);
-- 
cgit v0.10.2


From 91360b02ab483fc96a70b8c3016838f5d3725f99 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 11:08:22 -0400
Subject: ashmem: use vfs_llseek()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 713a972..ad4f579 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -339,7 +339,7 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin)
 		goto out;
 	}
 
-	ret = asma->file->f_op->llseek(asma->file, offset, origin);
+	ret = vfs_llseek(asma->file, offset, origin);
 	if (ret < 0)
 		goto out;
 
-- 
cgit v0.10.2


From 594822918de20bf3a50afbc4de65b6f2971a92db Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 11:28:35 -0400
Subject: vme: don't open-code fixed_size_llseek()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 920e50a..cdd2ff7 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -410,41 +410,19 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf,
 
 static loff_t vme_user_llseek(struct file *file, loff_t off, int whence)
 {
-	loff_t absolute = -1;
 	unsigned int minor = MINOR(file_inode(file)->i_rdev);
 	size_t image_size;
+	loff_t res;
 
 	if (minor == CONTROL_MINOR)
 		return -EINVAL;
 
 	mutex_lock(&image[minor].mutex);
 	image_size = vme_get_size(image[minor].resource);
-
-	switch (whence) {
-	case SEEK_SET:
-		absolute = off;
-		break;
-	case SEEK_CUR:
-		absolute = file->f_pos + off;
-		break;
-	case SEEK_END:
-		absolute = image_size + off;
-		break;
-	default:
-		mutex_unlock(&image[minor].mutex);
-		return -EINVAL;
-	}
-
-	if ((absolute < 0) || (absolute >= image_size)) {
-		mutex_unlock(&image[minor].mutex);
-		return -EINVAL;
-	}
-
-	file->f_pos = absolute;
-
+	res = fixed_size_llseek(file, off, whence, image_size);
 	mutex_unlock(&image[minor].mutex);
 
-	return absolute;
+	return res;
 }
 
 /*
-- 
cgit v0.10.2


From 2ec3a12a667847d303d4d0c0576d5ff388052b48 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 11:48:09 -0400
Subject: cachefiles_write_page(): switch to __kernel_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 25e745b..616db0e7 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -880,7 +880,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 {
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
-	mm_segment_t old_fs;
 	struct file *file;
 	struct path path;
 	loff_t pos, eof;
@@ -914,36 +913,27 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 	if (IS_ERR(file)) {
 		ret = PTR_ERR(file);
 	} else {
-		ret = -EIO;
-		if (file->f_op->write) {
-			pos = (loff_t) page->index << PAGE_SHIFT;
-
-			/* we mustn't write more data than we have, so we have
-			 * to beware of a partial page at EOF */
-			eof = object->fscache.store_limit_l;
-			len = PAGE_SIZE;
-			if (eof & ~PAGE_MASK) {
-				ASSERTCMP(pos, <, eof);
-				if (eof - pos < PAGE_SIZE) {
-					_debug("cut short %llx to %llx",
-					       pos, eof);
-					len = eof - pos;
-					ASSERTCMP(pos + len, ==, eof);
-				}
+		pos = (loff_t) page->index << PAGE_SHIFT;
+
+		/* we mustn't write more data than we have, so we have
+		 * to beware of a partial page at EOF */
+		eof = object->fscache.store_limit_l;
+		len = PAGE_SIZE;
+		if (eof & ~PAGE_MASK) {
+			ASSERTCMP(pos, <, eof);
+			if (eof - pos < PAGE_SIZE) {
+				_debug("cut short %llx to %llx",
+				       pos, eof);
+				len = eof - pos;
+				ASSERTCMP(pos + len, ==, eof);
 			}
-
-			data = kmap(page);
-			file_start_write(file);
-			old_fs = get_fs();
-			set_fs(KERNEL_DS);
-			ret = file->f_op->write(
-				file, (const void __user *) data, len, &pos);
-			set_fs(old_fs);
-			kunmap(page);
-			file_end_write(file);
-			if (ret != len)
-				ret = -EIO;
 		}
+
+		data = kmap(page);
+		ret = __kernel_write(file, data, len, &pos);
+		kunmap(page);
+		if (ret != len)
+			ret = -EIO;
 		fput(file);
 	}
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 009d854..7d9318c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -513,6 +513,8 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 	return ret;
 }
 
+EXPORT_SYMBOL(__kernel_write);
+
 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
-- 
cgit v0.10.2


From 1a37f5ecb707aded73133e14b58102343de56bbd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 12:06:18 -0400
Subject: carma-fpga: switch to fixed_size_llseek()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
index 7be8983..f390dee 100644
--- a/drivers/misc/carma/carma-fpga-program.c
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -767,26 +767,7 @@ static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin)
 	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
 		return -EINVAL;
 
-	switch (origin) {
-	case SEEK_SET: /* seek relative to the beginning of the file */
-		newpos = offset;
-		break;
-	case SEEK_CUR: /* seek relative to current position in the file */
-		newpos = filp->f_pos + offset;
-		break;
-	case SEEK_END: /* seek relative to the end of the file */
-		newpos = priv->fw_size - offset;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* check for sanity */
-	if (newpos > priv->fw_size)
-		return -EINVAL;
-
-	filp->f_pos = newpos;
-	return newpos;
+	return fixed_size_llseek(file, offset, origin, priv->fw_size);
 }
 
 static const struct file_operations fpga_fops = {
-- 
cgit v0.10.2


From d88c242623e90c99864317baae1e192bece2af57 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 12:08:37 -0400
Subject: carma-fpga: switch to simple_read_from_buffer()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
index f390dee..7e97e53 100644
--- a/drivers/misc/carma/carma-fpga-program.c
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -749,13 +749,8 @@ static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count,
 			 loff_t *f_pos)
 {
 	struct fpga_dev *priv = filp->private_data;
-
-	count = min_t(size_t, priv->bytes - *f_pos, count);
-	if (copy_to_user(buf, priv->vb.vaddr + *f_pos, count))
-		return -EFAULT;
-
-	*f_pos += count;
-	return count;
+	return simple_read_from_buffer(buf, count, ppos,
+				       priv->vb.vaddr, priv->bytes);
 }
 
 static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin)
-- 
cgit v0.10.2


From 8e3fb059ae7c246ff906c3b988d0de1d66809e84 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Aug 2014 14:42:04 -0400
Subject: rsxx debugfs inanity

check with the author of that horror...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 820b400..3265ce9 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -62,12 +62,6 @@ static DEFINE_SPINLOCK(rsxx_ida_lock);
 
 /* --------------------Debugfs Setup ------------------- */
 
-struct rsxx_cram {
-	u32 f_pos;
-	u32 offset;
-	void *i_private;
-};
-
 static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
 {
 	struct rsxx_cardinfo *card = m->private;
@@ -184,93 +178,50 @@ static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
 static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
 			      size_t cnt, loff_t *ppos)
 {
-	struct rsxx_cram *info = fp->private_data;
-	struct rsxx_cardinfo *card = info->i_private;
+	struct rsxx_cardinfo *card = file_inode(fp)->i_private;
 	char *buf;
-	int st;
+	ssize_t st;
 
-	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	buf = kzalloc(cnt, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-	info->f_pos = (u32)*ppos + info->offset;
-
-	st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
-	if (st)
-		return st;
-
-	st = copy_to_user(ubuf, buf, cnt);
+	st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
+	if (!st)
+		st = copy_to_user(ubuf, buf, cnt);
+	kfree(buf);
 	if (st)
 		return st;
-
-	info->offset += cnt;
-
-	kfree(buf);
-
+	*ppos += cnt;
 	return cnt;
 }
 
 static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
 			       size_t cnt, loff_t *ppos)
 {
-	struct rsxx_cram *info = fp->private_data;
-	struct rsxx_cardinfo *card = info->i_private;
+	struct rsxx_cardinfo *card = file_inode(fp)->i_private;
 	char *buf;
-	int st;
+	ssize_t st;
 
-	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	buf = kzalloc(cnt, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
 	st = copy_from_user(buf, ubuf, cnt);
+	if (!st)
+		st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt,
+				     buf, 1);
+	kfree(buf);
 	if (st)
 		return st;
-
-	info->f_pos = (u32)*ppos + info->offset;
-
-	st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
-	if (st)
-		return st;
-
-	info->offset += cnt;
-
-	kfree(buf);
-
+	*ppos += cnt;
 	return cnt;
 }
 
-static int rsxx_cram_open(struct inode *inode, struct file *file)
-{
-	struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	info->i_private = inode->i_private;
-	info->f_pos = file->f_pos;
-	file->private_data = info;
-
-	return 0;
-}
-
-static int rsxx_cram_release(struct inode *inode, struct file *file)
-{
-	struct rsxx_cram *info = file->private_data;
-
-	if (!info)
-		return 0;
-
-	kfree(info);
-	file->private_data = NULL;
-
-	return 0;
-}
-
 static const struct file_operations debugfs_cram_fops = {
 	.owner		= THIS_MODULE,
-	.open		= rsxx_cram_open,
 	.read		= rsxx_cram_read,
 	.write		= rsxx_cram_write,
-	.release	= rsxx_cram_release,
 };
 
 static const struct file_operations debugfs_stats_fops = {
-- 
cgit v0.10.2


From a1f6dbac629e36f89a1332b5ae773b831c136ee9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Aug 2014 11:05:50 -0400
Subject: dma-buf: don't open-code atomic_long_read()

... not to mention that even atomic_long_read() is too low-level here -
there's file_count().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f3014c4..5be225c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -799,7 +799,7 @@ static int dma_buf_describe(struct seq_file *s)
 		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n",
 				buf_obj->size,
 				buf_obj->file->f_flags, buf_obj->file->f_mode,
-				(long)(buf_obj->file->f_count.counter),
+				file_count(buf_obj->file),
 				buf_obj->exp_name);
 
 		seq_puts(s, "\tAttached Devices:\n");
-- 
cgit v0.10.2


From 13ba33e89991f6c020a36cfac0001dd54281e67c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 18 Aug 2014 10:04:12 -0400
Subject: switch /dev/zero and /dev/full to ->read_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 917403f..524b707 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -622,53 +622,23 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
 	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
 }
 
-static ssize_t read_zero(struct file *file, char __user *buf,
-			 size_t count, loff_t *ppos)
+static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter)
 {
-	size_t written;
-
-	if (!count)
-		return 0;
-
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-
-	written = 0;
-	while (count) {
-		unsigned long unwritten;
-		size_t chunk = count;
+	size_t written = 0;
 
+	while (iov_iter_count(iter)) {
+		size_t chunk = iov_iter_count(iter), n;
 		if (chunk > PAGE_SIZE)
 			chunk = PAGE_SIZE;	/* Just for latency reasons */
-		unwritten = __clear_user(buf, chunk);
-		written += chunk - unwritten;
-		if (unwritten)
-			break;
+		n = iov_iter_zero(chunk, iter);
+		if (!n && iov_iter_count(iter))
+			return written ? written : -EFAULT;
+		written += n;
 		if (signal_pending(current))
 			return written ? written : -ERESTARTSYS;
-		buf += chunk;
-		count -= chunk;
 		cond_resched();
 	}
-	return written ? written : -EFAULT;
-}
-
-static ssize_t aio_read_zero(struct kiocb *iocb, const struct iovec *iov,
-			     unsigned long nr_segs, loff_t pos)
-{
-	size_t written = 0;
-	unsigned long i;
-	ssize_t ret;
-
-	for (i = 0; i < nr_segs; i++) {
-		ret = read_zero(iocb->ki_filp, iov[i].iov_base, iov[i].iov_len,
-				&pos);
-		if (ret < 0)
-			break;
-		written += ret;
-	}
-
-	return written ? written : -EFAULT;
+	return written;
 }
 
 static int mmap_zero(struct file *file, struct vm_area_struct *vma)
@@ -738,7 +708,6 @@ static int open_port(struct inode *inode, struct file *filp)
 #define zero_lseek	null_lseek
 #define full_lseek      null_lseek
 #define write_zero	write_null
-#define read_full       read_zero
 #define aio_write_zero	aio_write_null
 #define open_mem	open_port
 #define open_kmem	open_mem
@@ -783,9 +752,9 @@ static const struct file_operations port_fops = {
 
 static const struct file_operations zero_fops = {
 	.llseek		= zero_lseek,
-	.read		= read_zero,
+	.read		= new_sync_read,
 	.write		= write_zero,
-	.aio_read	= aio_read_zero,
+	.read_iter	= read_iter_zero,
 	.aio_write	= aio_write_zero,
 	.mmap		= mmap_zero,
 };
@@ -802,7 +771,8 @@ static struct backing_dev_info zero_bdi = {
 
 static const struct file_operations full_fops = {
 	.llseek		= full_lseek,
-	.read		= read_full,
+	.read		= new_sync_read,
+	.read_iter	= read_iter_zero,
 	.write		= write_full,
 };
 
-- 
cgit v0.10.2


From 512b2268156a4e15ebf897f9a883bdee153a54b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Aug 2014 11:28:14 -0400
Subject: switch hci_vhci to ->write_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 5bb5872..6653473 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -160,13 +160,11 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode)
 }
 
 static inline ssize_t vhci_get_user(struct vhci_data *data,
-				    const struct iovec *iov,
-				    unsigned long count)
+				    struct iov_iter *from)
 {
-	size_t len = iov_length(iov, count);
+	size_t len = iov_iter_count(from);
 	struct sk_buff *skb;
 	__u8 pkt_type, opcode;
-	unsigned long i;
 	int ret;
 
 	if (len < 2 || len > HCI_MAX_FRAME_SIZE)
@@ -176,12 +174,9 @@ static inline ssize_t vhci_get_user(struct vhci_data *data,
 	if (!skb)
 		return -ENOMEM;
 
-	for (i = 0; i < count; i++) {
-		if (copy_from_user(skb_put(skb, iov[i].iov_len),
-				   iov[i].iov_base, iov[i].iov_len)) {
-			kfree_skb(skb);
-			return -EFAULT;
-		}
+	if (copy_from_iter(skb_put(skb, len), len, from) != len) {
+		kfree_skb(skb);
+		return -EFAULT;
 	}
 
 	pkt_type = *((__u8 *) skb->data);
@@ -294,13 +289,12 @@ static ssize_t vhci_read(struct file *file,
 	return ret;
 }
 
-static ssize_t vhci_write(struct kiocb *iocb, const struct iovec *iov,
-			  unsigned long count, loff_t pos)
+static ssize_t vhci_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct vhci_data *data = file->private_data;
 
-	return vhci_get_user(data, iov, count);
+	return vhci_get_user(data, from);
 }
 
 static unsigned int vhci_poll(struct file *file, poll_table *wait)
@@ -365,7 +359,7 @@ static int vhci_release(struct inode *inode, struct file *file)
 static const struct file_operations vhci_fops = {
 	.owner		= THIS_MODULE,
 	.read		= vhci_read,
-	.aio_write	= vhci_write,
+	.write_iter	= vhci_write,
 	.poll		= vhci_poll,
 	.open		= vhci_open,
 	.release	= vhci_release,
-- 
cgit v0.10.2


From cd678fce428018dee0c9345ed63ebf9920d9902f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Aug 2014 12:20:37 -0400
Subject: switch logger to ->write_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
index 0bf0d24..28b93d3 100644
--- a/drivers/staging/android/logger.c
+++ b/drivers/staging/android/logger.c
@@ -411,69 +411,18 @@ static void fix_up_readers(struct logger_log *log, size_t len)
 }
 
 /*
- * do_write_log - writes 'len' bytes from 'buf' to 'log'
- *
- * The caller needs to hold log->mutex.
- */
-static void do_write_log(struct logger_log *log, const void *buf, size_t count)
-{
-	size_t len;
-
-	len = min(count, log->size - log->w_off);
-	memcpy(log->buffer + log->w_off, buf, len);
-
-	if (count != len)
-		memcpy(log->buffer, buf + len, count - len);
-
-	log->w_off = logger_offset(log, log->w_off + count);
-
-}
-
-/*
- * do_write_log_user - writes 'len' bytes from the user-space buffer 'buf' to
- * the log 'log'
- *
- * The caller needs to hold log->mutex.
- *
- * Returns 'count' on success, negative error code on failure.
- */
-static ssize_t do_write_log_from_user(struct logger_log *log,
-				      const void __user *buf, size_t count)
-{
-	size_t len;
-
-	len = min(count, log->size - log->w_off);
-	if (len && copy_from_user(log->buffer + log->w_off, buf, len))
-		return -EFAULT;
-
-	if (count != len)
-		if (copy_from_user(log->buffer, buf + len, count - len))
-			/*
-			 * Note that by not updating w_off, this abandons the
-			 * portion of the new entry that *was* successfully
-			 * copied, just above.  This is intentional to avoid
-			 * message corruption from missing fragments.
-			 */
-			return -EFAULT;
-
-	log->w_off = logger_offset(log, log->w_off + count);
-
-	return count;
-}
-
-/*
- * logger_aio_write - our write method, implementing support for write(),
+ * logger_write_iter - our write method, implementing support for write(),
  * writev(), and aio_write(). Writes are our fast path, and we try to optimize
  * them above all else.
  */
-static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t ppos)
+static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct logger_log *log = file_get_log(iocb->ki_filp);
-	size_t orig;
 	struct logger_entry header;
 	struct timespec now;
-	ssize_t ret = 0;
+	size_t len, count;
+
+	count = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD);
 
 	now = current_kernel_time();
 
@@ -482,7 +431,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	header.sec = now.tv_sec;
 	header.nsec = now.tv_nsec;
 	header.euid = current_euid();
-	header.len = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD);
+	header.len = count;
 	header.hdr_size = sizeof(struct logger_entry);
 
 	/* null writes succeed, return zero */
@@ -491,8 +440,6 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	mutex_lock(&log->mutex);
 
-	orig = log->w_off;
-
 	/*
 	 * Fix up any readers, pulling them forward to the first readable
 	 * entry after (what will be) the new write offset. We do this now
@@ -501,33 +448,35 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	 */
 	fix_up_readers(log, sizeof(struct logger_entry) + header.len);
 
-	do_write_log(log, &header, sizeof(struct logger_entry));
-
-	while (nr_segs-- > 0) {
-		size_t len;
-		ssize_t nr;
+	len = min(sizeof(header), log->size - log->w_off);
+	memcpy(log->buffer + log->w_off, &header, len);
+	memcpy(log->buffer, (char *)&header + len, sizeof(header) - len);
 
-		/* figure out how much of this vector we can keep */
-		len = min_t(size_t, iov->iov_len, header.len - ret);
+	len = min(count, log->size - log->w_off);
 
-		/* write out this segment's payload */
-		nr = do_write_log_from_user(log, iov->iov_base, len);
-		if (unlikely(nr < 0)) {
-			log->w_off = orig;
-			mutex_unlock(&log->mutex);
-			return nr;
-		}
+	if (copy_from_iter(log->buffer + log->w_off, len, from) != len) {
+		/*
+		 * Note that by not updating w_off, this abandons the
+		 * portion of the new entry that *was* successfully
+		 * copied, just above.  This is intentional to avoid
+		 * message corruption from missing fragments.
+		 */
+		mutex_unlock(&log->mutex);
+		return -EFAULT;
+	}
 
-		iov++;
-		ret += nr;
+	if (copy_from_iter(log->buffer, count - len, from) != count - len) {
+		mutex_unlock(&log->mutex);
+		return -EFAULT;
 	}
 
+	log->w_off = logger_offset(log, log->w_off + count);
 	mutex_unlock(&log->mutex);
 
 	/* wake up any blocked readers */
 	wake_up_interruptible(&log->wq);
 
-	return ret;
+	return len;
 }
 
 static struct logger_log *get_log_from_minor(int minor)
@@ -736,7 +685,7 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 static const struct file_operations logger_fops = {
 	.owner = THIS_MODULE,
 	.read = logger_read,
-	.aio_write = logger_aio_write,
+	.write_iter = logger_write_iter,
 	.poll = logger_poll,
 	.unlocked_ioctl = logger_ioctl,
 	.compat_ioctl = logger_ioctl,
-- 
cgit v0.10.2


From 849f3127bb46ef75a66dffc1b9b0d3f5f43fa395 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Aug 2014 12:23:53 -0400
Subject: switch /dev/kmsg to ->write_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1ce7706..7a6e694 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -519,14 +519,13 @@ struct devkmsg_user {
 	char buf[8192];
 };
 
-static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv,
-			      unsigned long count, loff_t pos)
+static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	char *buf, *line;
 	int i;
 	int level = default_message_loglevel;
 	int facility = 1;	/* LOG_USER */
-	size_t len = iov_length(iv, count);
+	size_t len = iocb->ki_nbytes;
 	ssize_t ret = len;
 
 	if (len > LOG_LINE_MAX)
@@ -535,13 +534,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv,
 	if (buf == NULL)
 		return -ENOMEM;
 
-	line = buf;
-	for (i = 0; i < count; i++) {
-		if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		line += iv[i].iov_len;
+	buf[len] = '\0';
+	if (copy_from_iter(buf, len, from) != len) {
+		kfree(buf);
+		return -EFAULT;
 	}
 
 	/*
@@ -567,10 +563,8 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv,
 			line = endp;
 		}
 	}
-	line[len] = '\0';
 
 	printk_emit(facility, level, NULL, 0, "%s", line);
-out:
 	kfree(buf);
 	return ret;
 }
@@ -802,7 +796,7 @@ static int devkmsg_release(struct inode *inode, struct file *file)
 const struct file_operations kmsg_fops = {
 	.open = devkmsg_open,
 	.read = devkmsg_read,
-	.aio_write = devkmsg_writev,
+	.write_iter = devkmsg_write,
 	.llseek = devkmsg_llseek,
 	.poll = devkmsg_poll,
 	.release = devkmsg_release,
-- 
cgit v0.10.2


From 8ba7f6c2faada3ad553518b9febbdce7a988359b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Aug 2014 12:37:29 -0400
Subject: saner perf_atoll()

That loop in there is both anti-idiomatic *and* completely pointless.
strtoll() is there for purpose; use it and compare what's left with
acceptable suffices.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 2553e5b..d87767f 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -9,78 +9,48 @@
  */
 s64 perf_atoll(const char *str)
 {
-	unsigned int i;
-	s64 length = -1, unit = 1;
+	s64 length;
+	char *p;
+	char c;
 
 	if (!isdigit(str[0]))
 		goto out_err;
 
-	for (i = 1; i < strlen(str); i++) {
-		switch (str[i]) {
-		case 'B':
-		case 'b':
-			break;
-		case 'K':
-			if (str[i + 1] != 'B')
-				goto out_err;
-			else
-				goto kilo;
-		case 'k':
-			if (str[i + 1] != 'b')
-				goto out_err;
-kilo:
-			unit = K;
-			break;
-		case 'M':
-			if (str[i + 1] != 'B')
-				goto out_err;
-			else
-				goto mega;
-		case 'm':
-			if (str[i + 1] != 'b')
+	length = strtoll(str, &p, 10);
+	switch (c = *p++) {
+		case 'b': case 'B':
+			if (*p)
 				goto out_err;
-mega:
-			unit = K * K;
+		case '\0':
+			return length;
+		default:
+			goto out_err;
+		/* two-letter suffices */
+		case 'k': case 'K':
+			length <<= 10;
 			break;
-		case 'G':
-			if (str[i + 1] != 'B')
-				goto out_err;
-			else
-				goto giga;
-		case 'g':
-			if (str[i + 1] != 'b')
-				goto out_err;
-giga:
-			unit = K * K * K;
+		case 'm': case 'M':
+			length <<= 20;
 			break;
-		case 'T':
-			if (str[i + 1] != 'B')
-				goto out_err;
-			else
-				goto tera;
-		case 't':
-			if (str[i + 1] != 'b')
-				goto out_err;
-tera:
-			unit = K * K * K * K;
+		case 'g': case 'G':
+			length <<= 30;
 			break;
-		case '\0':	/* only specified figures */
-			unit = 1;
+		case 't': case 'T':
+			length <<= 40;
 			break;
-		default:
-			if (!isdigit(str[i]))
-				goto out_err;
-			break;
-		}
 	}
-
-	length = atoll(str) * unit;
-	goto out;
+	/* we want the cases to match */
+	if (islower(c)) {
+		if (strcmp(p, "b") != 0)
+			goto out_err;
+	} else {
+		if (strcmp(p, "B") != 0)
+			goto out_err;
+	}
+	return length;
 
 out_err:
-	length = -1;
-out:
-	return length;
+	return -1;
 }
 
 /*
-- 
cgit v0.10.2


From b8850d1fa8e2f6653e57daf6d08e58c5f5eb2c85 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Thu, 28 Aug 2014 11:26:03 -0600
Subject: fs: namespace: suppress 'may be used uninitialized' warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gcc version 4.9.1 compiler complains Even though it isn't possible for
these variables to not get initialized before they are used.

fs/namespace.c: In function ‘SyS_mount’:
fs/namespace.c:2720:8: warning: ‘kernel_dev’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
        ^
fs/namespace.c:2699:8: note: ‘kernel_dev’ was declared here
  char *kernel_dev;
        ^
fs/namespace.c:2720:8: warning: ‘kernel_type’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
        ^
fs/namespace.c:2697:8: note: ‘kernel_type’ was declared here
  char *kernel_type;
        ^

Fix the warnings by simplifying copy_mount_string() as suggested by Al Viro.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/compat.c b/fs/compat.c
index 66d3d3c..6205c24 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -797,8 +797,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	struct filename *dir;
 	int retval;
 
-	retval = copy_mount_string(type, &kernel_type);
-	if (retval < 0)
+	kernel_type = copy_mount_string(type);
+	retval = PTR_ERR(kernel_type);
+	if (IS_ERR(kernel_type))
 		goto out;
 
 	dir = getname(dir_name);
@@ -806,8 +807,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	if (IS_ERR(dir))
 		goto out1;
 
-	retval = copy_mount_string(dev_name, &kernel_dev);
-	if (retval < 0)
+	kernel_dev = copy_mount_string(dev_name);
+	retval = PTR_ERR(kernel_dev);
+	if (IS_ERR(kernel_dev))
 		goto out2;
 
 	retval = copy_mount_options(data, &data_page);
diff --git a/fs/internal.h b/fs/internal.h
index e325b4f..bd4ac19 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -51,7 +51,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
  * namespace.c
  */
 extern int copy_mount_options(const void __user *, unsigned long *);
-extern int copy_mount_string(const void __user *, char **);
+extern char *copy_mount_string(const void __user *);
 
 extern struct vfsmount *lookup_mnt(struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 00e5b1e..abd3abb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2514,21 +2514,9 @@ int copy_mount_options(const void __user * data, unsigned long *where)
 	return 0;
 }
 
-int copy_mount_string(const void __user *data, char **where)
+char *copy_mount_string(const void __user *data)
 {
-	char *tmp;
-
-	if (!data) {
-		*where = NULL;
-		return 0;
-	}
-
-	tmp = strndup_user(data, PAGE_SIZE);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	*where = tmp;
-	return 0;
+	return data ? strndup_user(data, PAGE_SIZE) : NULL;
 }
 
 /*
@@ -2794,8 +2782,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	char *kernel_dev;
 	unsigned long data_page;
 
-	ret = copy_mount_string(type, &kernel_type);
-	if (ret < 0)
+	kernel_type = copy_mount_string(type);
+	ret = PTR_ERR(kernel_type);
+	if (IS_ERR(kernel_type))
 		goto out_type;
 
 	kernel_dir = getname(dir_name);
@@ -2804,8 +2793,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		goto out_dir;
 	}
 
-	ret = copy_mount_string(dev_name, &kernel_dev);
-	if (ret < 0)
+	kernel_dev = copy_mount_string(dev_name);
+	ret = PTR_ERR(kernel_dev);
+	if (IS_ERR(kernel_dev))
 		goto out_dev;
 
 	ret = copy_mount_options(data, &data_page);
-- 
cgit v0.10.2


From e983094d6dce524f3890edfec44b7ca6dbfa1183 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 Aug 2014 14:12:09 -0400
Subject: missing annotation in fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/file.c b/fs/file.c
index 66923fe..f3b2c20 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -750,6 +750,7 @@ bool get_close_on_exec(unsigned int fd)
 
 static int do_dup2(struct files_struct *files,
 	struct file *file, unsigned fd, unsigned flags)
+__releases(&files->file_lock)
 {
 	struct file *tofree;
 	struct fdtable *fdt;
-- 
cgit v0.10.2


From 3cfb2face6205d30ecfc0145d68cd9e0c3dfe6f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 Aug 2014 15:06:09 -0400
Subject: nouveau: __iomem misannotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index 47ca886..6544b84 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -40,12 +40,12 @@
 #include "nouveau_usif.h"
 
 static void
-nvkm_client_unmap(void *priv, void *ptr, u32 size)
+nvkm_client_unmap(void *priv, void __iomem *ptr, u32 size)
 {
 	iounmap(ptr);
 }
 
-static void *
+static void __iomem *
 nvkm_client_map(void *priv, u64 handle, u32 size)
 {
 	return ioremap(handle, size);
diff --git a/drivers/gpu/drm/nouveau/nvif/driver.h b/drivers/gpu/drm/nouveau/nvif/driver.h
index b72a8f0..ac4bdb3 100644
--- a/drivers/gpu/drm/nouveau/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/nvif/driver.h
@@ -9,8 +9,8 @@ struct nvif_driver {
 	int (*suspend)(void *priv);
 	int (*resume)(void *priv);
 	int (*ioctl)(void *priv, bool super, void *data, u32 size, void **hack);
-	void *(*map)(void *priv, u64 handle, u32 size);
-	void (*unmap)(void *priv, void *ptr, u32 size);
+	void __iomem *(*map)(void *priv, u64 handle, u32 size);
+	void (*unmap)(void *priv, void __iomem *ptr, u32 size);
 	bool keep;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvif/object.h b/drivers/gpu/drm/nouveau/nvif/object.h
index fac3a3b..fe51917 100644
--- a/drivers/gpu/drm/nouveau/nvif/object.h
+++ b/drivers/gpu/drm/nouveau/nvif/object.h
@@ -14,7 +14,7 @@ struct nvif_object {
 	void *priv; /*XXX: hack */
 	void (*dtor)(struct nvif_object *);
 	struct {
-		void *ptr;
+		void __iomem *ptr;
 		u32 size;
 	} map;
 };
@@ -42,7 +42,7 @@ void nvif_object_unmap(struct nvif_object *);
 	struct nvif_object *_object = nvif_object(a);                          \
 	u32 _data;                                                             \
 	if (likely(_object->map.ptr))                                          \
-		_data = ioread##b##_native((u8 *)_object->map.ptr + (c));      \
+		_data = ioread##b##_native((u8 __iomem *)_object->map.ptr + (c));      \
 	else                                                                   \
 		_data = nvif_object_rd(_object, (b) / 8, (c));                 \
 	_data;                                                                 \
@@ -50,7 +50,7 @@ void nvif_object_unmap(struct nvif_object *);
 #define nvif_wr(a,b,c,d) ({                                                    \
 	struct nvif_object *_object = nvif_object(a);                          \
 	if (likely(_object->map.ptr))                                          \
-		iowrite##b##_native((d), (u8 *)_object->map.ptr + (c));        \
+		iowrite##b##_native((d), (u8 __iomem *)_object->map.ptr + (c));        \
 	else                                                                   \
 		nvif_object_wr(_object, (b) / 8, (c), (d));                    \
 })
-- 
cgit v0.10.2


From 765d368217715c794f767d396d8cea16502d823f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 Sep 2014 17:29:40 -0400
Subject: android: ->f_op is never NULL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/staging/android/ion/compat_ion.c b/drivers/staging/android/ion/compat_ion.c
index ee3a738..a402fda 100644
--- a/drivers/staging/android/ion/compat_ion.c
+++ b/drivers/staging/android/ion/compat_ion.c
@@ -125,7 +125,7 @@ long compat_ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	long ret;
 
-	if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+	if (!filp->f_op->unlocked_ioctl)
 		return -ENOTTY;
 
 	switch (cmd) {
-- 
cgit v0.10.2


From c2e3f5d5f4620bb6568bc559f712ce80222e20cb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 Sep 2014 17:31:28 -0400
Subject: ecryptfs: ->f_op is never NULL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index db0fad3..4ffa35e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -327,7 +327,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct file *lower_file = ecryptfs_file_to_lower(file);
 	long rc = -ENOIOCTLCMD;
 
-	if (lower_file->f_op && lower_file->f_op->compat_ioctl)
+	if (lower_file->f_op->compat_ioctl)
 		rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
 	return rc;
 }
-- 
cgit v0.10.2


From 6b933de642d2726245df98d076cff09bf4d34dde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 Sep 2014 19:42:14 -0400
Subject: [s390] remove pointless assignment of ->f_op in vmlogrdr ->open()

The only way we can get to that function is from misc_open(), after
the latter has set file->f_op to exactly the same value we are
(re)assigning there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index a8848db..9bb48d7 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -338,7 +338,6 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp)
 
 	/* set the file options */
 	filp->private_data = logptr;
-	filp->f_op = &vmlogrdr_fops;
 
 	/* start recording for this service*/
 	if (logptr->autorecording) {
-- 
cgit v0.10.2


From 9bb8730ed3d2658c6dd49b7f811231e2a0a2e3ed Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 Sep 2014 23:53:04 -0400
Subject: jfs: don't hash direct inode

hlist_add_fake(inode->i_hash), same as for the rest of special ones...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index adf8cb0..93e897e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -550,7 +550,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode->i_ino = 0;
 	inode->i_size = sb->s_bdev->bd_inode->i_size;
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
-	insert_inode_hash(inode);
+	hlist_add_fake(&inode->i_hash);
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	sbi->direct_inode = inode;
-- 
cgit v0.10.2


From 1bb27cacf4992b77556ed4487f99c76c4af3b43d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 3 Sep 2014 13:32:19 -0400
Subject: f_fs: saner API for ffs_sb_create_file()

make it return dentry instead of inode

Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 0dc3552..4726e27 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -164,10 +164,9 @@ struct ffs_desc_helper {
 static int  __must_check ffs_epfiles_create(struct ffs_data *ffs);
 static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count);
 
-static struct inode *__must_check
+static struct dentry *
 ffs_sb_create_file(struct super_block *sb, const char *name, void *data,
-		   const struct file_operations *fops,
-		   struct dentry **dentry_p);
+		   const struct file_operations *fops);
 
 /* Devices management *******************************************************/
 
@@ -1096,10 +1095,9 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
 }
 
 /* Create "regular" file */
-static struct inode *ffs_sb_create_file(struct super_block *sb,
+static struct dentry *ffs_sb_create_file(struct super_block *sb,
 					const char *name, void *data,
-					const struct file_operations *fops,
-					struct dentry **dentry_p)
+					const struct file_operations *fops)
 {
 	struct ffs_data	*ffs = sb->s_fs_info;
 	struct dentry	*dentry;
@@ -1118,10 +1116,7 @@ static struct inode *ffs_sb_create_file(struct super_block *sb,
 	}
 
 	d_add(dentry, inode);
-	if (dentry_p)
-		*dentry_p = dentry;
-
-	return inode;
+	return dentry;
 }
 
 /* Super block */
@@ -1166,7 +1161,7 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
 
 	/* EP0 file */
 	if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs,
-					 &ffs_ep0_operations, NULL)))
+					 &ffs_ep0_operations)))
 		return -ENOMEM;
 
 	return 0;
@@ -1535,9 +1530,10 @@ static int ffs_epfiles_create(struct ffs_data *ffs)
 		mutex_init(&epfile->mutex);
 		init_waitqueue_head(&epfile->wait);
 		sprintf(epfiles->name, "ep%u",  i);
-		if (!unlikely(ffs_sb_create_file(ffs->sb, epfiles->name, epfile,
-						 &ffs_epfile_operations,
-						 &epfile->dentry))) {
+		epfile->dentry = ffs_sb_create_file(ffs->sb, epfiles->name,
+						 epfile,
+						 &ffs_epfile_operations);
+		if (unlikely(!epfile->dentry)) {
 			ffs_epfiles_destroy(epfiles, i - 1);
 			return -ENOMEM;
 		}
-- 
cgit v0.10.2


From fb6c3225b49d389ff4a9a454fc8c3f3642632744 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 3 Sep 2014 13:37:56 -0400
Subject: gadgetfs: saner API for gadgetfs_create_file()

return dentry, not inode.  dev->inode is never used by anything,
don't bother with storing it.

Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index e96077b..69202cd 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -198,7 +198,6 @@ struct ep_data {
 	struct list_head		epfiles;
 	wait_queue_head_t		wait;
 	struct dentry			*dentry;
-	struct inode			*inode;
 };
 
 static inline void get_ep (struct ep_data *data)
@@ -1618,10 +1617,9 @@ static void destroy_ep_files (struct dev_data *dev)
 }
 
 
-static struct inode *
+static struct dentry *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, const struct file_operations *fops,
-		struct dentry **dentry_p);
+		void *data, const struct file_operations *fops);
 
 static int activate_ep_files (struct dev_data *dev)
 {
@@ -1649,10 +1647,9 @@ static int activate_ep_files (struct dev_data *dev)
 		if (!data->req)
 			goto enomem1;
 
-		data->inode = gadgetfs_create_file (dev->sb, data->name,
-				data, &ep_config_operations,
-				&data->dentry);
-		if (!data->inode)
+		data->dentry = gadgetfs_create_file (dev->sb, data->name,
+				data, &ep_config_operations);
+		if (!data->dentry)
 			goto enomem2;
 		list_add_tail (&data->epfiles, &dev->epfiles);
 	}
@@ -2011,10 +2008,9 @@ gadgetfs_make_inode (struct super_block *sb,
 /* creates in fs root directory, so non-renamable and non-linkable.
  * so inode and dentry are paired, until device reconfig.
  */
-static struct inode *
+static struct dentry *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, const struct file_operations *fops,
-		struct dentry **dentry_p)
+		void *data, const struct file_operations *fops)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
@@ -2030,8 +2026,7 @@ gadgetfs_create_file (struct super_block *sb, char const *name,
 		return NULL;
 	}
 	d_add (dentry, inode);
-	*dentry_p = dentry;
-	return inode;
+	return dentry;
 }
 
 static const struct super_operations gadget_fs_operations = {
@@ -2079,9 +2074,8 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent)
 		goto Enomem;
 
 	dev->sb = sb;
-	if (!gadgetfs_create_file (sb, CHIP,
-				dev, &dev_init_operations,
-				&dev->dentry)) {
+	dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &dev_init_operations);
+	if (!dev->dentry) {
 		put_dev(dev);
 		goto Enomem;
 	}
-- 
cgit v0.10.2


From 18c85d09dcd14342cd4891d016f4086095160573 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Sep 2014 19:29:32 -0400
Subject: [infiniband] remove pointless assignments

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index e0c404b..4977082 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode,
 {
 	int error;
 
-	*dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(*dentry))
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index cab610c..8185458 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode,
 {
 	int error;
 
-	*dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(*dentry))
-- 
cgit v0.10.2


From 4d93bc3e81736ce55c79d9cae743bab4f89b4f9c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 12 Sep 2014 18:21:05 -0400
Subject: gfs2_atomic_open(): skip lookups on hashed dentry

hashed dentry can be passed to ->atomic_open() only if
a) it has just passed revalidation and
b) it's negative

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index fc8ac2e..8108b4f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1244,6 +1244,9 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct dentry *d;
 	bool excl = !!(flags & O_EXCL);
 
+	if (!d_unhashed(dentry))
+		goto skip_lookup;
+
 	d = __gfs2_lookup(dir, dentry, file, opened);
 	if (IS_ERR(d))
 		return PTR_ERR(d);
@@ -1260,6 +1263,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 
 	BUG_ON(d != NULL);
+
+skip_lookup:
 	if (!(flags & O_CREAT))
 		return -ENOENT;
 
-- 
cgit v0.10.2


From 5e6123f3477e4260fb14392f0a88f1a842fa4d42 Mon Sep 17 00:00:00 2001
From: Seunghun Lee <waydi1@gmail.com>
Date: Sun, 14 Sep 2014 22:15:10 +0900
Subject: vfs: move getname() from callers to do_mount()

It would make more sense to pass char __user * instead of
char * in callers of do_mount() and do getname() inside do_mount().

Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Seunghun Lee <waydi1@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 1402fcc..f9c732e 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -446,7 +446,8 @@ struct procfs_args {
  * unhappy with OSF UFS. [CHECKME]
  */
 static int
-osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
+osf_ufs_mount(const char __user *dirname,
+	      struct ufs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
 }
 
 static int
-osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
+osf_cdfs_mount(const char __user *dirname,
+	       struct cdfs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
 }
 
 static int
-osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags)
+osf_procfs_mount(const char __user *dirname,
+		 struct procfs_args __user *args, int flags)
 {
 	struct procfs_args tmp;
 
@@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
 		int, flag, void __user *, data)
 {
 	int retval;
-	struct filename *name;
 
-	name = getname(path);
-	retval = PTR_ERR(name);
-	if (IS_ERR(name))
-		goto out;
 	switch (typenr) {
 	case 1:
-		retval = osf_ufs_mount(name->name, data, flag);
+		retval = osf_ufs_mount(path, data, flag);
 		break;
 	case 6:
-		retval = osf_cdfs_mount(name->name, data, flag);
+		retval = osf_cdfs_mount(path, data, flag);
 		break;
 	case 9:
-		retval = osf_procfs_mount(name->name, data, flag);
+		retval = osf_procfs_mount(path, data, flag);
 		break;
 	default:
 		retval = -EINVAL;
 		printk("osf_mount(%ld, %x)\n", typenr, flag);
 	}
-	putname(name);
- out:
+
 	return retval;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index 6205c24..b13df99 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -794,7 +794,6 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	char *kernel_type;
 	unsigned long data_page;
 	char *kernel_dev;
-	struct filename *dir;
 	int retval;
 
 	kernel_type = copy_mount_string(type);
@@ -802,19 +801,14 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	if (IS_ERR(kernel_type))
 		goto out;
 
-	dir = getname(dir_name);
-	retval = PTR_ERR(dir);
-	if (IS_ERR(dir))
-		goto out1;
-
 	kernel_dev = copy_mount_string(dev_name);
 	retval = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
-		goto out2;
+		goto out1;
 
 	retval = copy_mount_options(data, &data_page);
 	if (retval < 0)
-		goto out3;
+		goto out2;
 
 	retval = -EINVAL;
 
@@ -823,19 +817,17 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 			do_ncp_super_data_conv((void *)data_page);
 		} else if (!strcmp(kernel_type, NFS4_NAME)) {
 			if (do_nfs4_super_data_conv((void *) data_page))
-				goto out4;
+				goto out3;
 		}
 	}
 
-	retval = do_mount(kernel_dev, dir->name, kernel_type,
+	retval = do_mount(kernel_dev, dir_name, kernel_type,
 			flags, (void*)data_page);
 
- out4:
-	free_page(data_page);
  out3:
-	kfree(kernel_dev);
+	free_page(data_page);
  out2:
-	putname(dir);
+	kfree(kernel_dev);
  out1:
 	kfree(kernel_type);
  out:
diff --git a/fs/namespace.c b/fs/namespace.c
index abd3abb..348562f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2533,7 +2533,7 @@ char *copy_mount_string(const void __user *data)
  * Therefore, if this magic number is present, it carries no information
  * and must be discarded.
  */
-long do_mount(const char *dev_name, const char *dir_name,
+long do_mount(const char *dev_name, const char __user *dir_name,
 		const char *type_page, unsigned long flags, void *data_page)
 {
 	struct path path;
@@ -2545,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name,
 		flags &= ~MS_MGC_MSK;
 
 	/* Basic sanity checks */
-
-	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
-		return -EINVAL;
-
 	if (data_page)
 		((char *)data_page)[PAGE_SIZE - 1] = 0;
 
 	/* ... and get the mountpoint */
-	retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+	retval = user_path(dir_name, &path);
 	if (retval)
 		return retval;
 
@@ -2778,7 +2774,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 {
 	int ret;
 	char *kernel_type;
-	struct filename *kernel_dir;
 	char *kernel_dev;
 	unsigned long data_page;
 
@@ -2787,12 +2782,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (IS_ERR(kernel_type))
 		goto out_type;
 
-	kernel_dir = getname(dir_name);
-	if (IS_ERR(kernel_dir)) {
-		ret = PTR_ERR(kernel_dir);
-		goto out_dir;
-	}
-
 	kernel_dev = copy_mount_string(dev_name);
 	ret = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
@@ -2802,15 +2791,13 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (ret < 0)
 		goto out_data;
 
-	ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
+	ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
 		(void *) data_page);
 
 	free_page(data_page);
 out_data:
 	kfree(kernel_dev);
 out_dev:
-	putname(kernel_dir);
-out_dir:
 	kfree(kernel_type);
 out_type:
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75bdd51..9214836 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1855,7 +1855,8 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
 extern void kern_unmount(struct vfsmount *mnt);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char *, const char *, unsigned long, void *);
+extern long do_mount(const char *, const char __user *,
+		     const char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
-- 
cgit v0.10.2


From 821cc3070ff54e39ab6624c843f1905d737d9ac0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 25 Sep 2014 23:57:58 -0400
Subject: ncpfs: use list_for_each_entry() for d_subdirs walk

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 08b8ea8..314e7ad 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -388,7 +388,6 @@ static struct dentry *
 ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 {
 	struct dentry *dent = dentry;
-	struct list_head *next;
 
 	if (d_validate(dent, parent)) {
 		if (dent->d_name.len <= NCP_MAXPATHLEN &&
@@ -404,9 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 
 	/* If a pointer is invalid, we search the dentry. */
 	spin_lock(&parent->d_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_u.d_child);
+	list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) {
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget(dent);
@@ -415,7 +412,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 			spin_unlock(&parent->d_lock);
 			goto out;
 		}
-		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
 	return NULL;
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 32c0658..52cb19d 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -188,20 +188,14 @@ static inline void
 ncp_renew_dentries(struct dentry *parent)
 {
 	struct ncp_server *server = NCP_SERVER(parent->d_inode);
-	struct list_head *next;
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
 		if (dentry->d_fsdata == NULL)
 			ncp_age_dentry(server, dentry);
 		else
 			ncp_new_dentry(dentry);
-
-		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
 }
@@ -210,16 +204,12 @@ static inline void
 ncp_invalidate_dircache_entries(struct dentry *parent)
 {
 	struct ncp_server *server = NCP_SERVER(parent->d_inode);
-	struct list_head *next;
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_u.d_child);
+	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
 		dentry->d_fsdata = NULL;
 		ncp_age_dentry(server, dentry);
-		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
 }
-- 
cgit v0.10.2


From 24dff96a37a2ca319e75a74d3929b2de22447ca6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 8 Oct 2014 23:44:00 -0400
Subject: fix misuses of f_count() in ppp and netlink

we used to check for "nobody else could start doing anything with
that opened file" by checking that refcount was 2 or less - one
for descriptor table and one we'd acquired in fget() on the way to
wherever we are.  That was race-prone (somebody else might have
had a reference to descriptor table and do fget() just as we'd
been checking) and it had become flat-out incorrect back when
we switched to fget_light() on those codepaths - unlike fget(),
it doesn't grab an extra reference unless the descriptor table
is shared.  The same change allowed a race-free check, though -
we are safe exactly when refcount is less than 2.

It was a long time ago; pre-2.6.12 for ioctl() (the codepath leading
to ppp one) and 2.6.17 for sendmsg() (netlink one).  OTOH,
netlink hadn't grown that check until 3.9 and ppp used to live
in drivers/net, not drivers/net/ppp until 3.1.  The bug existed
well before that, though, and the same fix used to apply in old
location of file.

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fa0d717..90c639b 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -594,7 +594,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			if (file == ppp->owner)
 				ppp_shutdown_interface(ppp);
 		}
-		if (atomic_long_read(&file->f_count) <= 2) {
+		if (atomic_long_read(&file->f_count) < 2) {
 			ppp_release(NULL, file);
 			err = 0;
 		} else
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c416725..7a186e7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -715,7 +715,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 	 * after validation, the socket and the ring may only be used by a
 	 * single process, otherwise we fall back to copying.
 	 */
-	if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
+	if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
 	    atomic_read(&nlk->mapped) > 1)
 		excl = false;
 
-- 
cgit v0.10.2


From 115cbfdc609702a131c51281864c08f5d27b459a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 11 Oct 2014 23:05:52 -0400
Subject: let path_init() failures treated the same way as subsequent
 link_path_walk()

As it is, path_lookupat() and path_mounpoint() might end up leaking struct file
reference in some cases.

Spotted-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index d20d579..0f64aa4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1950,7 +1950,7 @@ static int path_lookupat(int dfd, const char *name,
 	err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
 
 	if (unlikely(err))
-		return err;
+		goto out;
 
 	current->total_link_count = 0;
 	err = link_path_walk(name, nd);
@@ -1982,6 +1982,7 @@ static int path_lookupat(int dfd, const char *name,
 		}
 	}
 
+out:
 	if (base)
 		fput(base);
 
@@ -2301,7 +2302,7 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags
 
 	err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
 	if (unlikely(err))
-		return err;
+		goto out;
 
 	current->total_link_count = 0;
 	err = link_path_walk(name, &nd);
-- 
cgit v0.10.2


From 810bb172671aec17cf85cc748120cf73c17af372 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 12:45:37 -0400
Subject: take dname_external() into fs/dcache.c

never used outside and it's too low-level for legitimate uses outside
of fs/dcache.c anyway

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 8221faae..d5a23fd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -264,6 +264,11 @@ static void __d_free_external(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static inline int dname_external(const struct dentry *dentry)
+{
+	return dentry->d_name.name != dentry->d_iname;
+}
+
 static void dentry_free(struct dentry *dentry)
 {
 	if (unlikely(dname_external(dentry))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 81b0315..27a7f00 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -226,11 +226,6 @@ struct dentry_operations {
 
 extern seqlock_t rename_lock;
 
-static inline int dname_external(const struct dentry *dentry)
-{
-	return dentry->d_name.name != dentry->d_iname;
-}
-
 /*
  * These are the low-level FS interfaces to the dcache..
  */
-- 
cgit v0.10.2


From 7b600f2abb36909e70963cc7c744c15983500bee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 13:31:58 -0400
Subject: don't need that forward declaration of struct nameidata in dcache.h
 anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 27a7f00..b2a2a08 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,7 +11,6 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 
-struct nameidata;
 struct path;
 struct vfsmount;
 
-- 
cgit v0.10.2


From 50b220bbe7092bbfe4406adfe3a216337a64655d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 14:21:38 -0400
Subject: reiserfs: remove pointless forward declaration of struct nameidata

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index 857ec7e..f620e96 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -7,7 +7,6 @@ struct inode;
 struct dentry;
 struct iattr;
 struct super_block;
-struct nameidata;
 
 int reiserfs_xattr_register_handlers(void) __init;
 void reiserfs_xattr_unregister_handlers(void);
-- 
cgit v0.10.2


From 8cc431165d8fbda43634dd15ab17f76a151c39a8 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sun, 12 Oct 2014 11:59:58 -0500
Subject: vfs: Deduplicate code shared by xattr system calls operating on paths

The following pairs of system calls dealing with extended attributes only
differ in their behavior on whether the symbolic link is followed (when
the named file is a symbolic link):

- setxattr() and lsetxattr()
- getxattr() and lgetxattr()
- listxattr() and llistxattr()
- removexattr() and lremovexattr()

Despite this, the implementations all had duplicated code, so this commit
redirects each of the above pairs of system calls to a corresponding
function to which different lookup flags (LOOKUP_FOLLOW or 0) are passed.

For me this reduced the stripped size of xattr.o from 8824 to 8248 bytes.

Signed-off-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/xattr.c b/fs/xattr.c
index c69e6d4..64e83ef 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -364,13 +364,12 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
-		const char __user *, name, const void __user *, value,
-		size_t, size, int, flags)
+static int path_setxattr(const char __user *pathname,
+			 const char __user *name, const void __user *value,
+			 size_t size, int flags, unsigned int lookup_flags)
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
@@ -388,28 +387,18 @@ retry:
 	return error;
 }
 
+SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
+		const char __user *, name, const void __user *, value,
+		size_t, size, int, flags)
+{
+	return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW);
+}
+
 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
 		const char __user *, name, const void __user *, value,
 		size_t, size, int, flags)
 {
-	struct path path;
-	int error;
-	unsigned int lookup_flags = 0;
-retry:
-	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
-	if (error)
-		return error;
-	error = mnt_want_write(path.mnt);
-	if (!error) {
-		error = setxattr(path.dentry, name, value, size, flags);
-		mnt_drop_write(path.mnt);
-	}
-	path_put(&path);
-	if (retry_estale(error, lookup_flags)) {
-		lookup_flags |= LOOKUP_REVAL;
-		goto retry;
-	}
-	return error;
+	return path_setxattr(pathname, name, value, size, flags, 0);
 }
 
 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
@@ -481,12 +470,12 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
 	return error;
 }
 
-SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
-		const char __user *, name, void __user *, value, size_t, size)
+static ssize_t path_getxattr(const char __user *pathname,
+			     const char __user *name, void __user *value,
+			     size_t size, unsigned int lookup_flags)
 {
 	struct path path;
 	ssize_t error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
@@ -500,23 +489,16 @@ retry:
 	return error;
 }
 
+SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
+		const char __user *, name, void __user *, value, size_t, size)
+{
+	return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW);
+}
+
 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
 		const char __user *, name, void __user *, value, size_t, size)
 {
-	struct path path;
-	ssize_t error;
-	unsigned int lookup_flags = 0;
-retry:
-	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
-	if (error)
-		return error;
-	error = getxattr(path.dentry, name, value, size);
-	path_put(&path);
-	if (retry_estale(error, lookup_flags)) {
-		lookup_flags |= LOOKUP_REVAL;
-		goto retry;
-	}
-	return error;
+	return path_getxattr(pathname, name, value, size, 0);
 }
 
 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
@@ -571,12 +553,11 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 	return error;
 }
 
-SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
-		size_t, size)
+static ssize_t path_listxattr(const char __user *pathname, char __user *list,
+			      size_t size, unsigned int lookup_flags)
 {
 	struct path path;
 	ssize_t error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
@@ -590,23 +571,16 @@ retry:
 	return error;
 }
 
+SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
+		size_t, size)
+{
+	return path_listxattr(pathname, list, size, LOOKUP_FOLLOW);
+}
+
 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
 		size_t, size)
 {
-	struct path path;
-	ssize_t error;
-	unsigned int lookup_flags = 0;
-retry:
-	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
-	if (error)
-		return error;
-	error = listxattr(path.dentry, list, size);
-	path_put(&path);
-	if (retry_estale(error, lookup_flags)) {
-		lookup_flags |= LOOKUP_REVAL;
-		goto retry;
-	}
-	return error;
+	return path_listxattr(pathname, list, size, 0);
 }
 
 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
@@ -640,12 +614,11 @@ removexattr(struct dentry *d, const char __user *name)
 	return vfs_removexattr(d, kname);
 }
 
-SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
-		const char __user *, name)
+static int path_removexattr(const char __user *pathname,
+			    const char __user *name, unsigned int lookup_flags)
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
@@ -663,27 +636,16 @@ retry:
 	return error;
 }
 
+SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
+		const char __user *, name)
+{
+	return path_removexattr(pathname, name, LOOKUP_FOLLOW);
+}
+
 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
 		const char __user *, name)
 {
-	struct path path;
-	int error;
-	unsigned int lookup_flags = 0;
-retry:
-	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
-	if (error)
-		return error;
-	error = mnt_want_write(path.mnt);
-	if (!error) {
-		error = removexattr(path.dentry, name);
-		mnt_drop_write(path.mnt);
-	}
-	path_put(&path);
-	if (retry_estale(error, lookup_flags)) {
-		lookup_flags |= LOOKUP_REVAL;
-		goto retry;
-	}
-	return error;
+	return path_removexattr(pathname, name, 0);
 }
 
 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
-- 
cgit v0.10.2


From a457606a6f81cfddfc9da1ef2a8bf2c65a8eb35e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sun, 12 Oct 2014 14:29:29 -0500
Subject: fs/file_table.c: Update alloc_file() comment

This comment is 5 years outdated; init_file() no longer exists.

Signed-off-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/file_table.c b/fs/file_table.c
index 385bfd3..a843623 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -150,18 +150,10 @@ over:
 
 /**
  * alloc_file - allocate and initialize a 'struct file'
- * @mnt: the vfsmount on which the file will reside
- * @dentry: the dentry representing the new file
+ *
+ * @path: the (dentry, vfsmount) pair for the new file
  * @mode: the mode with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
- *
- * Use this instead of get_empty_filp() to get a new
- * 'struct file'.  Do so because of the same initialization
- * pitfalls reasons listed for init_file().  This is a
- * preferred interface to using init_file().
- *
- * If all the callers of init_file() are eliminated, its
- * code should be moved into this function.
  */
 struct file *alloc_file(struct path *path, fmode_t mode,
 		const struct file_operations *fop)
-- 
cgit v0.10.2