From 6d13f69444bd3d4888e43f7756449748f5a98bad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 29 Sep 2014 14:46:30 -0400 Subject: missing data dependency barrier in prepend_name() AFAICS, prepend_name() is broken on SMP alpha. Disclaimer: I don't have SMP alpha boxen to reproduce it on. However, it really looks like the race is real. CPU1: d_path() on /mnt/ramfs/<255-character>/foo CPU2: mv /mnt/ramfs/<255-character> /mnt/ramfs/<63-character> CPU2 does d_alloc(), which allocates an external name, stores the name there including terminating NUL, does smp_wmb() and stores its address in dentry->d_name.name. It proceeds to d_add(dentry, NULL) and d_move() old dentry over to that. ->d_name.name value ends up in that dentry. In the meanwhile, CPU1 gets to prepend_name() for that dentry. It fetches ->d_name.name and ->d_name.len; the former ends up pointing to new name (64-byte kmalloc'ed array), the latter - 255 (length of the old name). Nothing to force the ordering there, and normally that would be OK, since we'd run into the terminating NUL and stop. Except that it's alpha, and we'd need a data dependency barrier to guarantee that we see that store of NUL __d_alloc() has done. In a similar situation dentry_cmp() would survive; it does explicit smp_read_barrier_depends() after fetching ->d_name.name. prepend_name() doesn't and it risks walking past the end of kmalloc'ed object and possibly oops due to taking a page fault in kernel mode. Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index cb25a1a..e7484f9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2810,6 +2810,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) * the beginning of the name. The sequence number check at the caller will * retry it again when a d_move() does happen. So any garbage in the buffer * due to mismatched pointer and length will be discarded. + * + * Data dependency barrier is needed to make sure that we see that terminating + * NUL. Alpha strikes again, film at 11... */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { @@ -2817,6 +2820,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; + smp_read_barrier_depends(); + *buflen -= dlen + 1; if (*buflen < 0) return -ENAMETOOLONG; -- cgit v0.10.2 From 8d85b4845a668d9a72649005c5aa932657311bd4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 29 Sep 2014 14:54:27 -0400 Subject: Allow sharing external names after __d_move() * external dentry names get a small structure prepended to them (struct external_name). * it contains an atomic refcount, matching the number of struct dentry instances that have ->d_name.name pointing to that external name. The first thing free_dentry() does is decrementing refcount of external name, so the instances that are between the call of free_dentry() and RCU-delayed actual freeing do not contribute. * __d_move(x, y, false) makes the name of x equal to the name of y, external or not. If y has an external name, extra reference is grabbed and put into x->d_name.name. If x used to have an external name, the reference to the old name is dropped and, should it reach zero, freeing is scheduled via kfree_rcu(). * free_dentry() in dentry with external name decrements the refcount of that name and, should it reach zero, does RCU-delayed call that will free both the dentry and external name. Otherwise it does what it used to do, except that __d_free() doesn't even look at ->d_name.name; it simply frees the dentry. All non-RCU accesses to dentry external name are safe wrt freeing since they all should happen before free_dentry() is called. RCU accesses might run into a dentry seen by free_dentry() or into an old name that got already dropped by __d_move(); however, in both cases dentry must have been alive and refer to that name at some point after we'd done rcu_read_lock(), which means that any freeing must be still pending. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index e7484f9..4858d2e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -235,18 +235,44 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c return dentry_string_cmp(cs, ct, tcount); } +struct external_name { + union { + atomic_t count; + struct rcu_head head; + } u; + unsigned char name[]; +}; + +static inline struct external_name *external_name(struct dentry *dentry) +{ + return container_of(dentry->d_name.name, struct external_name, name[0]); +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); WARN_ON(!hlist_unhashed(&dentry->d_alias)); - if (dname_external(dentry)) - kfree(dentry->d_name.name); + kmem_cache_free(dentry_cache, dentry); +} + +static void __d_free_external(struct rcu_head *head) +{ + struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); + kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } static void dentry_free(struct dentry *dentry) { + if (unlikely(dname_external(dentry))) { + struct external_name *p = external_name(dentry); + if (likely(atomic_dec_and_test(&p->u.count))) { + call_rcu(&dentry->d_u.d_rcu, __d_free_external); + return; + } + } /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -1438,11 +1464,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) */ dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (name->len > DNAME_INLINE_LEN-1) { - dname = kmalloc(name->len + 1, GFP_KERNEL); - if (!dname) { + size_t size = offsetof(struct external_name, name[1]); + struct external_name *p = kmalloc(size + name->len, GFP_KERNEL); + if (!p) { kmem_cache_free(dentry_cache, dentry); return NULL; } + atomic_set(&p->u.count, 1); + dname = p->name; } else { dname = dentry->d_iname; } @@ -2372,11 +2401,10 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) } EXPORT_SYMBOL(dentry_update_name_case); -static void switch_names(struct dentry *dentry, struct dentry *target, - bool exchange) +static void swap_names(struct dentry *dentry, struct dentry *target) { - if (dname_external(target)) { - if (dname_external(dentry)) { + if (unlikely(dname_external(target))) { + if (unlikely(dname_external(dentry))) { /* * Both external: swap the pointers */ @@ -2392,7 +2420,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target, target->d_name.name = target->d_iname; } } else { - if (dname_external(dentry)) { + if (unlikely(dname_external(dentry))) { /* * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal @@ -2407,12 +2435,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target, */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - if (!exchange) { - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.hash_len = target->d_name.hash_len; - return; - } for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); @@ -2422,6 +2444,24 @@ static void switch_names(struct dentry *dentry, struct dentry *target, swap(dentry->d_name.hash_len, target->d_name.hash_len); } +static void copy_name(struct dentry *dentry, struct dentry *target) +{ + struct external_name *old_name = NULL; + if (unlikely(dname_external(dentry))) + old_name = external_name(dentry); + if (unlikely(dname_external(target))) { + atomic_inc(&external_name(target)->u.count); + dentry->d_name = target->d_name; + } else { + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + dentry->d_name.name = dentry->d_iname; + dentry->d_name.hash_len = target->d_name.hash_len; + } + if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) + kfree_rcu(old_name, u.head); +} + static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) { /* @@ -2518,7 +2558,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target, } /* Switch the names.. */ - switch_names(dentry, target, exchange); + if (exchange) + swap_names(dentry, target); + else + copy_name(dentry, target); /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { -- cgit v0.10.2 From b3ca406f2755c20cea1cc1169672c56dd03c266c Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 10 Sep 2014 09:56:22 +0800 Subject: autofs - remove obsolete d_invalidate() from expire Biederman's umount-on-rmdir series changes d_invalidate() to sumarily remove mounts under the passed in dentry regardless of whether they are busy or not. So calling this in fs/autofs4/expire.c:autofs4_tree_busy() is definitely the wrong thing to do becuase it will silently umount entries instead of just cleaning stale dentrys. But this call shouldn't be needed and testing shows that automounting continues to function without it. As Al Viro correctly surmises the original intent of the call was to perform what shrink_dcache_parent() does. If at some time in the future I see stale dentries accumulating following failed mounts I'll revisit the issue and possibly add a shrink_dcache_parent() call if needed. Signed-off-by: Ian Kent Cc: Al Viro Cc: Eric W. Biederman Signed-off-by: Al Viro diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a7be57e..8fa3895 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -255,12 +255,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt, struct autofs_info *ino = autofs4_dentry_ino(p); unsigned int ino_count = atomic_read(&ino->count); - /* - * Clean stale dentries below that have not been - * invalidated after a mount fail during lookup - */ - d_invalidate(p); - /* allow for dget above and top is already dgot */ if (p == top) ino_count += 2; -- cgit v0.10.2 From 9ea459e110df32e60a762f311f7939eaa879601d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Aug 2014 13:08:20 -0400 Subject: delayed mntput On final mntput() we want fs shutdown to happen before return to userland; however, the only case where we want it happen right there (i.e. where task_work_add won't do) is MNT_INTERNAL victim. Those have to be fully synchronous - failure halfway through module init might count on having vfsmount killed right there. Fortunately, final mntput on MNT_INTERNAL vfsmounts happens on shallow stack. So we handle those synchronously and do an analog of delayed fput logics for everything else. As the result, we are guaranteed that fs shutdown will always happen on shallow stack. Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index 6740a62..8f2a14a 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -29,7 +29,10 @@ struct mount { struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; - struct rcu_head mnt_rcu; + union { + struct rcu_head mnt_rcu; + struct llist_node mnt_llist; + }; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else diff --git a/fs/namespace.c b/fs/namespace.c index ef42d9b..0441343 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" @@ -957,6 +958,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } +static void cleanup_mnt(struct mount *mnt) +{ + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + /* + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. + */ + WARN_ON(mnt_get_writers(mnt)); + if (unlikely(mnt->mnt_pins.first)) + mnt_pin_kill(mnt); + fsnotify_vfsmount_delete(&mnt->mnt); + dput(mnt->mnt.mnt_root); + deactivate_super(mnt->mnt.mnt_sb); + mnt_free_id(mnt); + call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); +} + +static void __cleanup_mnt(struct rcu_head *head) +{ + cleanup_mnt(container_of(head, struct mount, mnt_rcu)); +} + +static LLIST_HEAD(delayed_mntput_list); +static void delayed_mntput(struct work_struct *unused) +{ + struct llist_node *node = llist_del_all(&delayed_mntput_list); + struct llist_node *next; + + for (; node; node = next) { + next = llist_next(node); + cleanup_mnt(llist_entry(node, struct mount, mnt_llist)); + } +} +static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); + static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); @@ -982,24 +1023,18 @@ static void mntput_no_expire(struct mount *mnt) list_del(&mnt->mnt_instance); unlock_mount_hash(); - /* - * This probably indicates that somebody messed - * up a mnt_want/drop_write() pair. If this - * happens, the filesystem was probably unable - * to make r/w->r/o transitions. - */ - /* - * The locking used to deal with mnt_count decrement provides barriers, - * so mnt_get_writers() below is safe. - */ - WARN_ON(mnt_get_writers(mnt)); - if (unlikely(mnt->mnt_pins.first)) - mnt_pin_kill(mnt); - fsnotify_vfsmount_delete(&mnt->mnt); - dput(mnt->mnt.mnt_root); - deactivate_super(mnt->mnt.mnt_sb); - mnt_free_id(mnt); - call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); + if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { + struct task_struct *task = current; + if (likely(!(task->flags & PF_KTHREAD))) { + init_task_work(&mnt->mnt_rcu, __cleanup_mnt); + if (!task_work_add(task, &mnt->mnt_rcu, true)) + return; + } + if (llist_add(&mnt->mnt_llist, &delayed_mntput_list)) + schedule_delayed_work(&delayed_mntput_work, 1); + return; + } + cleanup_mnt(mnt); } void mntput(struct vfsmount *mnt) -- cgit v0.10.2 From 3ccb354d641d910309b916b9c856e2a82ced7237 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Feb 2014 16:08:06 -0800 Subject: vfs: Document the effect of d_revalidate on d_find_alias d_drop or check_submounts_and_drop called from d_revalidate can result in renamed directories with child dentries being unhashed. These renamed and drop directory dentries can be rehashed after d_materialise_unique uses d_find_alias to find them. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 4858d2e..1f8e6ac 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -761,7 +761,8 @@ EXPORT_SYMBOL(dget_parent); * acquire the reference to alias and return it. Otherwise return NULL. * Notice that if inode is a directory there can be only one alias and * it can be unhashed only if it has no children, or if it is the root - * of a filesystem. + * of a filesystem, or if the directory was renamed and d_revalidate + * was the first vfs operation to notice. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer * any other hashed alias over that one. -- cgit v0.10.2 From bafc9b754f752ea798c39f9b099a228fd56604e0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 07:54:28 -0800 Subject: vfs: More precise tests in d_invalidate The current comments in d_invalidate about what and why it is doing what it is doing are wildly off-base. Which is not surprising as the comments date back to last minute bug fix of the 2.2 kernel. The big fat lie of a comment said: If it's a directory, we can't drop it for fear of somebody re-populating it with children (even though dropping it would make it unreachable from that root, we still might repopulate it if it was a working directory or similar). [AV] What we really need to avoid is multiple dentry aliases of the same directory inode; on all filesystems that have ->d_revalidate() we either declare all positive dentries always valid (and thus never fed to d_invalidate()) or use d_materialise_unique() and/or d_splice_alias(), which take care of alias prevention. The current rules are: - To prevent mount point leaks dentries that are mount points or that have childrent that are mount points may not be be unhashed. - All dentries may be unhashed. - Directories may be rehashed with d_materialise_unique check_submounts_and_drop implements this already for well maintained remote filesystems so implement the current rules in d_invalidate by just calling check_submounts_and_drop. The one difference between d_invalidate and check_submounts_and_drop is that d_invalidate must respect it when a d_revalidate method has earlier called d_drop so preserve the d_unhashed check in d_invalidate. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 1f8e6ac..8150e4e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -650,9 +650,8 @@ EXPORT_SYMBOL(dput); * @dentry: dentry to invalidate * * Try to invalidate the dentry if it turns out to be - * possible. If there are other dentries that can be - * reached through this one we can't delete it and we - * return -EBUSY. On success we return 0. + * possible. If there are reasons not to delete it + * return -EBUSY. On success return 0. * * no dcache lock. */ @@ -667,38 +666,9 @@ int d_invalidate(struct dentry * dentry) spin_unlock(&dentry->d_lock); return 0; } - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. - */ - if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); - shrink_dcache_parent(dentry); - spin_lock(&dentry->d_lock); - } - - /* - * Somebody else still using it? - * - * If it's a directory, we can't drop it - * for fear of somebody re-populating it - * with children (even though dropping it - * would make it unreachable from the root, - * we might still populate it if it was a - * working directory or similar). - * We also need to leave mountpoints alone, - * directory or not. - */ - if (dentry->d_lockref.count > 1 && dentry->d_inode) { - if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { - spin_unlock(&dentry->d_lock); - return -EBUSY; - } - } - - __d_drop(dentry); spin_unlock(&dentry->d_lock); - return 0; + + return check_submounts_and_drop(dentry); } EXPORT_SYMBOL(d_invalidate); -- cgit v0.10.2 From 7af1364ffa64db61e386628594836e13d2ef04b5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 4 Oct 2013 19:15:13 -0700 Subject: vfs: Don't allow overwriting mounts in the current mount namespace In preparation for allowing mountpoints to be renamed and unlinked in remote filesystems and in other mount namespaces test if on a dentry there is a mount in the local mount namespace before allowing it to be renamed or unlinked. The primary motivation here are old versions of fusermount unmount which is not safe if the a path can be renamed or unlinked while it is verifying the mount is safe to unmount. More recent versions are simpler and safer by simply using UMOUNT_NOFOLLOW when unmounting a mount in a directory owned by an arbitrary user. Miklos Szeredi reports this is approach is good enough to remove concerns about new kernels mixed with old versions of fusermount. A secondary motivation for restrictions here is that it removing empty directories that have non-empty mount points on them appears to violate the rule that rmdir can not remove empty directories. As Linus Torvalds pointed out this is useful for programs (like git) that test if a directory is empty with rmdir. Therefore this patch arranges to enforce the existing mount point semantics for local mount namespace. v2: Rewrote the test to be a drop in replacement for d_mountpoint v3: Use bool instead of int as the return type of is_local_mountpoint Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index 8f2a14a..8c6a2a6 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -115,3 +115,12 @@ struct proc_mounts { #define proc_mounts(p) (container_of((p), struct proc_mounts, m)) extern const struct seq_operations mounts_op; + +extern bool __is_local_mountpoint(struct dentry *dentry); +static inline bool is_local_mountpoint(struct dentry *dentry) +{ + if (!d_mountpoint(dentry)) + return false; + + return __is_local_mountpoint(dentry); +} diff --git a/fs/namei.c b/fs/namei.c index a7b05bf..a3a14b0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3565,6 +3565,8 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&dentry->d_inode->i_mutex); error = -EBUSY; + if (is_local_mountpoint(dentry)) + goto out; if (d_mountpoint(dentry)) goto out; @@ -3681,7 +3683,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return -EPERM; mutex_lock(&target->i_mutex); - if (d_mountpoint(dentry)) + if (is_local_mountpoint(dentry) || d_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); @@ -4126,6 +4128,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, mutex_lock(&target->i_mutex); error = -EBUSY; + if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) + goto out; if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; diff --git a/fs/namespace.c b/fs/namespace.c index 0441343..77ffdb8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -667,6 +667,41 @@ struct vfsmount *lookup_mnt(struct path *path) return m; } +/* + * __is_local_mountpoint - Test to see if dentry is a mountpoint in the + * current mount namespace. + * + * The common case is dentries are not mountpoints at all and that + * test is handled inline. For the slow case when we are actually + * dealing with a mountpoint of some kind, walk through all of the + * mounts in the current mount namespace and test to see if the dentry + * is a mountpoint. + * + * The mount_hashtable is not usable in the context because we + * need to identify all mounts that may be in the current mount + * namespace not just a mount that happens to have some specified + * parent mount. + */ +bool __is_local_mountpoint(struct dentry *dentry) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mount *mnt; + bool is_covered = false; + + if (!d_mountpoint(dentry)) + goto out; + + down_read(&namespace_sem); + list_for_each_entry(mnt, &ns->list, mnt_list) { + is_covered = (mnt->mnt_mountpoint == dentry); + if (is_covered) + break; + } + up_read(&namespace_sem); +out: + return is_covered; +} + static struct mountpoint *new_mountpoint(struct dentry *dentry) { struct hlist_head *chain = mp_hash(dentry); -- cgit v0.10.2 From 0a5eb7c8189922e86a840972cd0b57e41de6f031 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Sep 2013 19:37:01 -0700 Subject: vfs: Keep a list of mounts on a mount point To spot any possible problems call BUG if a mountpoint is put when it's list of mounts is not empty. AV: use hlist instead of list_head Reviewed-by: Miklos Szeredi Signed-off-by: Eric W. Biederman Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index 8c6a2a6..68bb03e 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -21,6 +21,7 @@ struct mnt_pcp { struct mountpoint { struct hlist_node m_hash; struct dentry *m_dentry; + struct hlist_head m_list; int m_count; }; @@ -51,6 +52,7 @@ struct mount { struct mount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ + struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ #ifdef CONFIG_FSNOTIFY struct hlist_head mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; diff --git a/fs/namespace.c b/fs/namespace.c index 77ffdb8..99572dd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -225,6 +225,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); + INIT_HLIST_NODE(&mnt->mnt_mp_list); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif @@ -731,6 +732,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; hlist_add_head(&mp->m_hash, chain); + INIT_HLIST_HEAD(&mp->m_list); return mp; } @@ -738,6 +740,7 @@ static void put_mountpoint(struct mountpoint *mp) { if (!--mp->m_count) { struct dentry *dentry = mp->m_dentry; + BUG_ON(!hlist_empty(&mp->m_list)); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); @@ -784,6 +787,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); hlist_del_init_rcu(&mnt->mnt_hash); + hlist_del_init(&mnt->mnt_mp_list); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -800,6 +804,7 @@ void mnt_set_mountpoint(struct mount *mnt, child_mnt->mnt_mountpoint = dget(mp->m_dentry); child_mnt->mnt_parent = mnt; child_mnt->mnt_mp = mp; + hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); } /* @@ -1342,6 +1347,7 @@ void umount_tree(struct mount *mnt, int how) if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; if (mnt_has_parent(p)) { + hlist_del_init(&p->mnt_mp_list); put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); /* move the reference to mountpoint into ->mnt_ex_mountpoint */ -- cgit v0.10.2 From e2dfa935464272395b4f35f4cc74ffcc87418b84 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Feb 2014 17:32:34 -0800 Subject: vfs: factor out lookup_mountpoint from new_mountpoint I am shortly going to add a new user of struct mountpoint that needs to look up existing entries but does not want to create a struct mountpoint if one does not exist. Therefore to keep the code simple and easy to read split out lookup_mountpoint from new_mountpoint. Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 99572dd..88fc3f4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -703,11 +703,10 @@ out: return is_covered; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *lookup_mountpoint(struct dentry *dentry) { struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; - int ret; hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { @@ -718,6 +717,14 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) return mp; } } + return NULL; +} + +static struct mountpoint *new_mountpoint(struct dentry *dentry) +{ + struct hlist_head *chain = mp_hash(dentry); + struct mountpoint *mp; + int ret; mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); if (!mp) @@ -1818,7 +1825,9 @@ retry: namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = new_mountpoint(dentry); + struct mountpoint *mp = lookup_mountpoint(dentry); + if (!mp) + mp = new_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); mutex_unlock(&dentry->d_inode->i_mutex); -- cgit v0.10.2 From 80b5dce8c59b0de1ed6e403b8298e02dcb4db64b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Oct 2013 01:31:18 -0700 Subject: vfs: Add a function to lazily unmount all mounts from any dentry. The new function detach_mounts comes in two pieces. The first piece is a static inline test of d_mounpoint that returns immediately without taking any locks if d_mounpoint is not set. In the common case when mountpoints are absent this allows the vfs to continue running with it's same cacheline foot print. The second piece of detach_mounts __detach_mounts actually does the work and it assumes that a mountpoint is present so it is slow and takes namespace_sem for write, and then locks the mount hash (aka mount_lock) after a struct mountpoint has been found. With those two locks held each entry on the list of mounts on a mountpoint is selected and lazily unmounted until all of the mount have been lazily unmounted. v7: Wrote a proper change description and removed the changelog documenting deleted wrong turns. Signed-off-by: Eric W. Biederman Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index 68bb03e..f82c628 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -87,6 +87,15 @@ extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); extern bool legitimize_mnt(struct vfsmount *, unsigned); +extern void __detach_mounts(struct dentry *dentry); + +static inline void detach_mounts(struct dentry *dentry) +{ + if (!d_mountpoint(dentry)) + return; + __detach_mounts(dentry); +} + static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); diff --git a/fs/namespace.c b/fs/namespace.c index 88fc3f4..00e5b1e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1468,6 +1468,37 @@ static int do_umount(struct mount *mnt, int flags) return retval; } +/* + * __detach_mounts - lazily unmount all mounts on the specified dentry + * + * During unlink, rmdir, and d_drop it is possible to loose the path + * to an existing mountpoint, and wind up leaking the mount. + * detach_mounts allows lazily unmounting those mounts instead of + * leaking them. + * + * The caller may hold dentry->d_inode->i_mutex. + */ +void __detach_mounts(struct dentry *dentry) +{ + struct mountpoint *mp; + struct mount *mnt; + + namespace_lock(); + mp = lookup_mountpoint(dentry); + if (!mp) + goto out_unlock; + + lock_mount_hash(); + while (!hlist_empty(&mp->m_list)) { + mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); + umount_tree(mnt, 2); + } + unlock_mount_hash(); + put_mountpoint(mp); +out_unlock: + namespace_unlock(); +} + /* * Is the caller allowed to modify his namespace? */ -- cgit v0.10.2 From 8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Oct 2013 18:33:48 -0700 Subject: vfs: Lazily remove mounts on unlinked files and directories. With the introduction of mount namespaces and bind mounts it became possible to access files and directories that on some paths are mount points but are not mount points on other paths. It is very confusing when rm -rf somedir returns -EBUSY simply because somedir is mounted somewhere else. With the addition of user namespaces allowing unprivileged mounts this condition has gone from annoying to allowing a DOS attack on other users in the system. The possibility for mischief is removed by updating the vfs to support rename, unlink and rmdir on a dentry that is a mountpoint and by lazily unmounting mountpoints on deleted dentries. In particular this change allows rename, unlink and rmdir system calls on a dentry without a mountpoint in the current mount namespace to succeed, and it allows rename, unlink, and rmdir performed on a distributed filesystem to update the vfs cache even if when there is a mount in some namespace on the original dentry. There are two common patterns of maintaining mounts: Mounts on trusted paths with the parent directory of the mount point and all ancestory directories up to / owned by root and modifiable only by root (i.e. /media/xxx, /dev, /dev/pts, /proc, /sys, /sys/fs/cgroup/{cpu, cpuacct, ...}, /usr, /usr/local). Mounts on unprivileged directories maintained by fusermount. In the case of mounts in trusted directories owned by root and modifiable only by root the current parent directory permissions are sufficient to ensure a mount point on a trusted path is not removed or renamed by anyone other than root, even if there is a context where the there are no mount points to prevent this. In the case of mounts in directories owned by less privileged users races with users modifying the path of a mount point are already a danger. fusermount already uses a combination of chdir, /proc//fd/NNN, and UMOUNT_NOFOLLOW to prevent these races. The removable of global rename, unlink, and rmdir protection really adds nothing new to consider only a widening of the attack window, and fusermount is already safe against unprivileged users modifying the directory simultaneously. In principle for perfect userspace programs returning -EBUSY for unlink, rmdir, and rename of dentires that have mounts in the local namespace is actually unnecessary. Unfortunately not all userspace programs are perfect so retaining -EBUSY for unlink, rmdir and rename of dentries that have mounts in the current mount namespace plays an important role of maintaining consistency with historical behavior and making imperfect userspace applications hard to exploit. v2: Remove spurious old_dentry. v3: Optimized shrink_submounts_and_drop Removed unsued afs label v4: Simplified the changes to check_submounts_and_drop Do not rename check_submounts_and_drop shrink_submounts_and_drop Document what why we need atomicity in check_submounts_and_drop Rely on the parent inode mutex to make d_revalidate and d_invalidate an atomic unit. v5: Refcount the mountpoint to detach in case of simultaneous renames. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 8150e4e..484114a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1343,36 +1343,39 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) +struct detach_data { + struct select_data select; + struct dentry *mountpoint; +}; +static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry) { - struct select_data *data = _data; + struct detach_data *data = _data; if (d_mountpoint(dentry)) { - data->found = -EBUSY; + __dget_dlock(dentry); + data->mountpoint = dentry; return D_WALK_QUIT; } - return select_collect(_data, dentry); + return select_collect(&data->select, dentry); } static void check_and_drop(void *_data) { - struct select_data *data = _data; + struct detach_data *data = _data; - if (d_mountpoint(data->start)) - data->found = -EBUSY; - if (!data->found) - __d_drop(data->start); + if (!data->mountpoint && !data->select.found) + __d_drop(data->select.start); } /** - * check_submounts_and_drop - prune dcache, check for submounts and drop + * check_submounts_and_drop - detach submounts, prune dcache, and drop * - * All done as a single atomic operation relative to has_unlinked_ancestor(). - * Returns 0 if successfully unhashed @parent. If there were submounts then - * return -EBUSY. + * The final d_drop is done as an atomic operation relative to + * rename_lock ensuring there are no races with d_set_mounted. This + * ensures there are no unhashed dentries on the path to a mountpoint. * - * @dentry: dentry to prune and drop + * @dentry: dentry to detach, prune and drop */ int check_submounts_and_drop(struct dentry *dentry) { @@ -1385,19 +1388,24 @@ int check_submounts_and_drop(struct dentry *dentry) } for (;;) { - struct select_data data; + struct detach_data data; - INIT_LIST_HEAD(&data.dispose); - data.start = dentry; - data.found = 0; + data.mountpoint = NULL; + INIT_LIST_HEAD(&data.select.dispose); + data.select.start = dentry; + data.select.found = 0; + + d_walk(dentry, &data, detach_and_collect, check_and_drop); - d_walk(dentry, &data, check_and_collect, check_and_drop); - ret = data.found; + if (data.select.found) + shrink_dentry_list(&data.select.dispose); - if (!list_empty(&data.dispose)) - shrink_dentry_list(&data.dispose); + if (data.mountpoint) { + detach_mounts(data.mountpoint); + dput(data.mountpoint); + } - if (ret <= 0) + if (!data.mountpoint && !data.select.found) break; cond_resched(); @@ -2639,10 +2647,8 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry, false); - ret = alias; - } + __d_move(alias, dentry, false); + ret = alias; out_err: spin_unlock(&inode->i_lock); if (m2) diff --git a/fs/namei.c b/fs/namei.c index a3a14b0..2ba1090 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3567,8 +3567,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) error = -EBUSY; if (is_local_mountpoint(dentry)) goto out; - if (d_mountpoint(dentry)) - goto out; error = security_inode_rmdir(dir, dentry); if (error) @@ -3581,6 +3579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); + detach_mounts(dentry); out: mutex_unlock(&dentry->d_inode->i_mutex); @@ -3683,7 +3682,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return -EPERM; mutex_lock(&target->i_mutex); - if (is_local_mountpoint(dentry) || d_mountpoint(dentry)) + if (is_local_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); @@ -3692,8 +3691,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (error) goto out; error = dir->i_op->unlink(dir, dentry); - if (!error) + if (!error) { dont_mount(dentry); + detach_mounts(dentry); + } } } out: @@ -4130,8 +4131,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = -EBUSY; if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) goto out; - if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) - goto out; if (max_links && new_dir != old_dir) { error = -EMLINK; @@ -4168,6 +4167,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (is_dir) target->i_flags |= S_DEAD; dont_mount(new_dentry); + detach_mounts(new_dentry); } if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { if (!(flags & RENAME_EXCHANGE)) -- cgit v0.10.2 From 9b053f3207e8887fed88162a339fdd4001abcdb2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 09:34:30 -0800 Subject: vfs: Remove unnecessary calls of check_submounts_and_drop Now that check_submounts_and_drop can not fail and is called from d_invalidate there is no longer a need to call check_submounts_and_drom from filesystem d_revalidate methods so remove it. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5293003..a1645b8 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -669,7 +669,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) out_valid: dentry->d_fsdata = dir_version; -out_skip: dput(parent); key_put(key); _leave(" = 1 [valid]"); @@ -682,10 +681,6 @@ not_found: spin_unlock(&dentry->d_lock); out_bad: - /* don't unhash if we have submounts */ - if (check_submounts_and_drop(dentry) != 0) - goto out_skip; - _debug("dropping dentry %s/%s", parent->d_name.name, dentry->d_name.name); dput(parent); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index de1d84a..820efd7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -274,9 +274,6 @@ out: invalid: ret = 0; - - if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) - ret = 1; goto out; } diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index d3a5d4e..589f4ea 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -93,9 +93,6 @@ invalid_gunlock: if (!had_lock) gfs2_glock_dq_uninit(&d_gh); invalid: - if (check_submounts_and_drop(dentry) != 0) - goto valid; - dput(parent); return 0; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a693f5b..1c77193 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -463,21 +463,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) goto out_bad; mutex_unlock(&kernfs_mutex); -out_valid: return 1; out_bad: mutex_unlock(&kernfs_mutex); out_bad_unlocked: - /* - * @dentry doesn't match the underlying kernfs node, drop the - * dentry and force lookup. If we have submounts we must allow the - * vfs caches to lie about the state of the filesystem to prevent - * leaks and other nasty things, so use check_submounts_and_drop() - * instead of d_drop(). - */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 36d921f..8be6988 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1211,10 +1211,6 @@ out_zap_parent: if (IS_ROOT(dentry)) goto out_valid; } - /* If we have submounts, don't unhash ! */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", __func__, dentry); -- cgit v0.10.2 From 1ffe46d11cc88479797b262f60d92e5fb461b411 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 09:39:37 -0800 Subject: vfs: Merge check_submounts_and_drop and d_invalidate Now that d_invalidate is the only caller of check_submounts_and_drop, expand check_submounts_and_drop inline in d_invalidate. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 484114a..5e02b9e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -645,32 +645,6 @@ kill_it: } EXPORT_SYMBOL(dput); -/** - * d_invalidate - invalidate a dentry - * @dentry: dentry to invalidate - * - * Try to invalidate the dentry if it turns out to be - * possible. If there are reasons not to delete it - * return -EBUSY. On success return 0. - * - * no dcache lock. - */ - -int d_invalidate(struct dentry * dentry) -{ - /* - * If it's already been dropped, return OK. - */ - spin_lock(&dentry->d_lock); - if (d_unhashed(dentry)) { - spin_unlock(&dentry->d_lock); - return 0; - } - spin_unlock(&dentry->d_lock); - - return check_submounts_and_drop(dentry); -} -EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) @@ -1190,7 +1164,7 @@ EXPORT_SYMBOL(have_submounts); * reachable (e.g. NFS can unhash a directory dentry and then the complete * subtree can become unreachable). * - * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For + * Only one of d_invalidate() and d_set_mounted() must succeed. For * this reason take rename_lock and d_lock on dentry and ancestors. */ int d_set_mounted(struct dentry *dentry) @@ -1199,7 +1173,7 @@ int d_set_mounted(struct dentry *dentry) int ret = -ENOENT; write_seqlock(&rename_lock); for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { - /* Need exclusion wrt. check_submounts_and_drop() */ + /* Need exclusion wrt. d_invalidate() */ spin_lock(&p->d_lock); if (unlikely(d_unhashed(p))) { spin_unlock(&p->d_lock); @@ -1369,18 +1343,33 @@ static void check_and_drop(void *_data) } /** - * check_submounts_and_drop - detach submounts, prune dcache, and drop + * d_invalidate - detach submounts, prune dcache, and drop + * @dentry: dentry to invalidate (aka detach, prune and drop) + * + * Try to invalidate the dentry if it turns out to be + * possible. If there are reasons not to delete it + * return -EBUSY. On success return 0. + * + * no dcache lock. * * The final d_drop is done as an atomic operation relative to * rename_lock ensuring there are no races with d_set_mounted. This * ensures there are no unhashed dentries on the path to a mountpoint. - * - * @dentry: dentry to detach, prune and drop */ -int check_submounts_and_drop(struct dentry *dentry) +int d_invalidate(struct dentry *dentry) { int ret = 0; + /* + * If it's already been dropped, return OK. + */ + spin_lock(&dentry->d_lock); + if (d_unhashed(dentry)) { + spin_unlock(&dentry->d_lock); + return 0; + } + spin_unlock(&dentry->d_lock); + /* Negative dentries can be dropped without further checks */ if (!dentry->d_inode) { d_drop(dentry); @@ -1414,7 +1403,7 @@ int check_submounts_and_drop(struct dentry *dentry) out: return ret; } -EXPORT_SYMBOL(check_submounts_and_drop); +EXPORT_SYMBOL(d_invalidate); /** * __d_alloc - allocate a dcache entry diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 75a227c..595af29 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -269,7 +269,6 @@ extern void d_prune_aliases(struct inode *); /* test whether we have any submounts in a subdir tree */ extern int have_submounts(struct dentry *); -extern int check_submounts_and_drop(struct dentry *); /* * This adds the entry to the hash queues. -- cgit v0.10.2 From 5542aa2fa7f6cddb03c4ac3135e390adffda98ca Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 09:46:25 -0800 Subject: vfs: Make d_invalidate return void Now that d_invalidate can no longer fail, stop returning a useless return code. For the few callers that checked the return code update remove the handling of d_invalidate failure. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8a8e298..996eb19 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2423,9 +2423,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_dput; } - err = d_invalidate(dentry); - if (err) - goto out_unlock; + d_invalidate(dentry); down_write(&root->fs_info->subvol_sem); @@ -2510,7 +2508,6 @@ out_release: btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); out_up_write: up_write(&root->fs_info->subvol_sem); -out_unlock: if (err) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b334a89..d2141f1 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -87,8 +87,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, return; if (dentry) { - int err; - inode = dentry->d_inode; if (inode) { /* @@ -105,10 +103,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, goto out; } } - err = d_invalidate(dentry); + d_invalidate(dentry); dput(dentry); - if (err) - return; } /* diff --git a/fs/dcache.c b/fs/dcache.c index 5e02b9e..70d102e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1346,34 +1346,28 @@ static void check_and_drop(void *_data) * d_invalidate - detach submounts, prune dcache, and drop * @dentry: dentry to invalidate (aka detach, prune and drop) * - * Try to invalidate the dentry if it turns out to be - * possible. If there are reasons not to delete it - * return -EBUSY. On success return 0. - * * no dcache lock. * * The final d_drop is done as an atomic operation relative to * rename_lock ensuring there are no races with d_set_mounted. This * ensures there are no unhashed dentries on the path to a mountpoint. */ -int d_invalidate(struct dentry *dentry) +void d_invalidate(struct dentry *dentry) { - int ret = 0; - /* * If it's already been dropped, return OK. */ spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); - return 0; + return; } spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ if (!dentry->d_inode) { d_drop(dentry); - goto out; + return; } for (;;) { @@ -1399,9 +1393,6 @@ int d_invalidate(struct dentry *dentry) cond_resched(); } - -out: - return ret; } EXPORT_SYMBOL(d_invalidate); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 820efd7..dbab798 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1286,9 +1286,7 @@ static int fuse_direntplus_link(struct file *file, d_drop(dentry); } else if (get_node_id(inode) != o->nodeid || ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { - err = d_invalidate(dentry); - if (err) - goto out; + d_invalidate(dentry); } else if (is_bad_inode(inode)) { err = -EIO; goto out; diff --git a/fs/namei.c b/fs/namei.c index 2ba1090..d20d579 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1306,7 +1306,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (error < 0) { dput(dentry); return ERR_PTR(error); - } else if (!d_invalidate(dentry)) { + } else { + d_invalidate(dentry); dput(dentry); dentry = NULL; } @@ -1435,10 +1436,9 @@ unlazy: dput(dentry); return status; } - if (!d_invalidate(dentry)) { - dput(dentry); - goto need_lookup; - } + d_invalidate(dentry); + dput(dentry); + goto need_lookup; } path->mnt = mnt; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8be6988..06e8cfc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -486,8 +486,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); goto out; } else { - if (d_invalidate(dentry) != 0) - goto out; + d_invalidate(dentry); dput(dentry); } } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 595af29..81b0315 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -254,7 +254,7 @@ extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -extern int d_invalidate(struct dentry *); +extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); -- cgit v0.10.2 From c143c2333c48f1430231b31a8c17e074b9b504eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 10:19:10 -0800 Subject: vfs: Remove d_drop calls from d_revalidate implementations Now that d_invalidate always succeeds it is not longer necessary or desirable to hard code d_drop calls into filesystem specific d_revalidate implementations. Remove the unnecessary d_drop calls and rely on d_invalidate to drop the dentries. Using d_invalidate ensures that paths to mount points will not be dropped. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c29d6ae..b6c59ea 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1069,7 +1069,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ceph_dentry_lru_touch(dentry); } else { ceph_dir_clear_complete(dir); - d_drop(dentry); } iput(dir); return valid; diff --git a/fs/proc/base.c b/fs/proc/base.c index baf852b..b4fe0ee 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1590,7 +1590,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) put_task_struct(task); return 1; } - d_drop(dentry); return 0; } @@ -1727,9 +1726,6 @@ out: put_task_struct(task); out_notask: - if (status <= 0) - d_drop(dentry); - return status; } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 955bb55..e11d7c5 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -129,8 +129,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) } put_task_struct(task); } - - d_drop(dentry); return 0; } -- cgit v0.10.2 From bbd5192412fdedbae00888316bfe350bf89d0458 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Feb 2014 10:24:23 -0800 Subject: proc: Update proc_flush_task_mnt to use d_invalidate Now that d_invalidate always succeeds and flushes mount points use it in stead of a combination of shrink_dcache_parent and d_drop in proc_flush_task_mnt. This removes the danger of a mount point under /proc//... becoming unreachable after the d_drop. Reviewed-by: Miklos Szeredi Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro diff --git a/fs/proc/base.c b/fs/proc/base.c index b4fe0ee..00cd85f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2639,8 +2639,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } @@ -2660,8 +2659,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } -- cgit v0.10.2 From 29355c3904e1765948c7721719a028b7eb5dfe1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 May 2014 11:25:30 -0400 Subject: d_prune_alias(): just lock the parent and call __dentry_kill() The only reason for games with ->d_prune() was __d_drop(), which was needed only to force dput() into killing the sucker off. Note that lock_parent() can be called under ->i_lock and won't drop it, so dentry is safe from somebody managing to kill it under us - it won't happen while we are holding ->i_lock. __dentry_kill() is called only with ->d_lockref.count being 0 (here and when picked from shrink list) or 1 (dput() and dropping the ancestors in shrink_dentry_list()), so it will never be called twice - the first thing it's doing is making ->d_lockref.count negative and once that happens, nothing will increment it. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 70d102e..d8a77b1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -770,20 +770,13 @@ restart: hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) { - /* - * inform the fs via d_prune that this dentry - * is about to be unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - __dget_dlock(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); - dput(dentry); - goto restart; + struct dentry *parent = lock_parent(dentry); + if (likely(!dentry->d_lockref.count)) { + __dentry_kill(dentry); + goto restart; + } + if (parent) + spin_unlock(&parent->d_lock); } spin_unlock(&dentry->d_lock); } -- cgit v0.10.2 From 2926620145095ffb0350b2312ac9d0af8537796f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 May 2014 11:39:02 -0400 Subject: dcache.c: call ->d_prune() regardless of d_unhashed() the only in-tree instance checks d_unhashed() anyway, out-of-tree code can preserve the current behaviour by adding such check if they want it and we get an ability to use it in cases where we *want* to be notified of killing being inevitable before ->d_lock is dropped, whether it's unhashed or not. In particular, autofs would benefit from that. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index d8a77b1..21eee4c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -482,7 +482,7 @@ static void __dentry_kill(struct dentry *dentry) * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) + if (dentry->d_flags & DCACHE_OP_PRUNE) dentry->d_op->d_prune(dentry); if (dentry->d_flags & DCACHE_LRU_LIST) { -- cgit v0.10.2 From 19d860a140beac48a1377f179e693abe86a9dac9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 May 2014 20:11:36 -0400 Subject: handle suicide on late failure exits in execve() in search_binary_handler() ... rather than doing that in the guts of ->load_binary(). [updated to fix the bug spotted by Shentino - for SIGSEGV we really need something stronger than send_sig_info(); again, better do that in one place] Signed-off-by: Al Viro diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d21ff89..df91466 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,11 +308,8 @@ static int load_aout_binary(struct linux_binprm *bprm) (current->mm->start_brk = N_BSSADDR(ex)); retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } install_exec_creds(bprm); @@ -324,17 +321,13 @@ static int load_aout_binary(struct linux_binprm *bprm) error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); + if (error != (text_addr & PAGE_MASK)) return error; - } error = read_code(bprm->file, text_addr, 32, ex.a_text + ex.a_data); - if ((signed long)error < 0) { - send_sig(SIGKILL, current, 0); + if ((signed long)error < 0) return error; - } } else { #ifdef WARN_OLD static unsigned long error_time, error_time2; @@ -368,20 +361,16 @@ static int load_aout_binary(struct linux_binprm *bprm) MAP_EXECUTABLE | MAP_32BIT, fd_offset); - if (error != N_TXTADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_TXTADDR(ex)) return error; - } error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_DATADDR(ex)) return error; - } } beyond_if: set_binfmt(&aout_format); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ca0ba15..929dec0 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -256,11 +256,8 @@ static int load_aout_binary(struct linux_binprm * bprm) (current->mm->start_brk = N_BSSADDR(ex)); retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } install_exec_creds(bprm); @@ -278,17 +275,13 @@ static int load_aout_binary(struct linux_binprm * bprm) map_size = ex.a_text+ex.a_data; #endif error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); + if (error != (text_addr & PAGE_MASK)) return error; - } error = read_code(bprm->file, text_addr, pos, ex.a_text+ex.a_data); - if ((signed long)error < 0) { - send_sig(SIGKILL, current, 0); + if ((signed long)error < 0) return error; - } } else { if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) @@ -315,28 +308,22 @@ static int load_aout_binary(struct linux_binprm * bprm) MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); - if (error != N_TXTADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_TXTADDR(ex)) return error; - } error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_DATADDR(ex)) return error; - } } beyond_if: set_binfmt(&aout_format); retval = set_brk(current->mm->start_brk, current->mm->brk); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3892c1a..d8fc060 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -738,10 +738,8 @@ static int load_elf_binary(struct linux_binprm *bprm) change some of these later */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out_free_dentry; - } current->mm->start_stack = bprm->p; @@ -763,10 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm) and clear the area. */ retval = set_brk(elf_bss + load_bias, elf_brk + load_bias); - if (retval) { - send_sig(SIGKILL, current, 0); + if (retval) goto out_free_dentry; - } nbyte = ELF_PAGEOFFSET(elf_bss); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; @@ -820,7 +816,6 @@ static int load_elf_binary(struct linux_binprm *bprm) error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, 0); if (BAD_ADDR(error)) { - send_sig(SIGKILL, current, 0); retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; @@ -851,7 +846,6 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ - send_sig(SIGKILL, current, 0); retval = -EINVAL; goto out_free_dentry; } @@ -883,12 +877,9 @@ static int load_elf_binary(struct linux_binprm *bprm) * up getting placed where the bss needs to go. */ retval = set_brk(elf_bss, elf_brk); - if (retval) { - send_sig(SIGKILL, current, 0); + if (retval) goto out_free_dentry; - } if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { - send_sig(SIGSEGV, current, 0); retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; } @@ -909,7 +900,6 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_entry += loc->interp_elf_ex.e_entry; } if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); retval = IS_ERR((void *)elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; @@ -922,7 +912,6 @@ static int load_elf_binary(struct linux_binprm *bprm) } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); retval = -EINVAL; goto out_free_dentry; } @@ -934,19 +923,15 @@ static int load_elf_binary(struct linux_binprm *bprm) #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES retval = arch_setup_additional_pages(bprm, !!elf_interpreter); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out; - } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out; - } /* N.B. passed_fileno might not be initialized? */ current->mm->end_code = end_code; current->mm->start_code = start_code; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index fe2a643..d3634bf 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -317,8 +317,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) goto error; /* there's now no turning back... the old userspace image is dead, - * defunct, deceased, etc. after this point we have to exit via - * error_kill */ + * defunct, deceased, etc. + */ set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; @@ -343,24 +343,22 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack); - if (retval < 0) { - send_sig(SIGKILL, current, 0); - goto error_kill; - } + if (retval < 0) + goto error; #endif /* load the executable and interpreter into memory */ retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable"); if (retval < 0) - goto error_kill; + goto error; if (interpreter_name) { retval = elf_fdpic_map_file(&interp_params, interpreter, current->mm, "interpreter"); if (retval < 0) { printk(KERN_ERR "Unable to load interpreter\n"); - goto error_kill; + goto error; } allow_write_access(interpreter); @@ -397,7 +395,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) if (IS_ERR_VALUE(current->mm->start_brk)) { retval = current->mm->start_brk; current->mm->start_brk = 0; - goto error_kill; + goto error; } current->mm->brk = current->mm->start_brk; @@ -410,7 +408,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) install_exec_creds(bprm); if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) - goto error_kill; + goto error; kdebug("- start_code %lx", current->mm->start_code); kdebug("- end_code %lx", current->mm->end_code); @@ -449,12 +447,6 @@ error: kfree(interp_params.phdrs); kfree(interp_params.loadmap); return retval; - - /* unrecoverable error - kill the process */ -error_kill: - send_sig(SIGSEGV, current, 0); - goto error; - } /*****************************************************************************/ diff --git a/fs/exec.c b/fs/exec.c index a2b42a9..7302b75 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1372,18 +1372,23 @@ int search_binary_handler(struct linux_binprm *bprm) read_unlock(&binfmt_lock); bprm->recursion_depth++; retval = fmt->load_binary(bprm); + read_lock(&binfmt_lock); + put_binfmt(fmt); bprm->recursion_depth--; - if (retval >= 0 || retval != -ENOEXEC || - bprm->mm == NULL || bprm->file == NULL) { - put_binfmt(fmt); + if (retval < 0 && !bprm->mm) { + /* we got to flush_old_exec() and failed after it */ + read_unlock(&binfmt_lock); + force_sigsegv(SIGSEGV, current); + return retval; + } + if (retval != -ENOEXEC || !bprm->file) { + read_unlock(&binfmt_lock); return retval; } - read_lock(&binfmt_lock); - put_binfmt(fmt); } read_unlock(&binfmt_lock); - if (need_retry && retval == -ENOEXEC) { + if (need_retry) { if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && printable(bprm->buf[2]) && printable(bprm->buf[3])) return retval; -- cgit v0.10.2 From 1fa97e8b1f327059aa98089abd8c3378cdf43017 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 May 2014 20:47:49 -0400 Subject: constify file_inode() Signed-off-by: Al Viro diff --git a/include/linux/fs.h b/include/linux/fs.h index 94187721..75bdd51 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1874,7 +1874,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); -static inline struct inode *file_inode(struct file *f) +static inline struct inode *file_inode(const struct file *f) { return f->f_inode; } -- cgit v0.10.2 From 4e07ad6406d8137cc5aa1317568408c461ee8ce8 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 14 Aug 2014 15:25:10 +0400 Subject: vfs: fix typo in s_op->alloc_inode() documentation The function which calls s_op->alloc_inode() is not inode_alloc(), but instead alloc_inode() which lives in fs/inode.c . The typo was there from the beginning from 5ea626aa (VFS: update documentation, 2005) - there was no standalone inode_alloc() for the whole kernel history. Cc: Pekka Enberg Signed-off-by: Kirill Smelkov Signed-off-by: Al Viro diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 61d65cc..02a766c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -237,7 +237,7 @@ noted. This means that most methods can block safely. All methods are only called from a process context (i.e. not from an interrupt handler or bottom half). - alloc_inode: this method is called by inode_alloc() to allocate memory + alloc_inode: this method is called by alloc_inode() to allocate memory for struct inode and initialize it. If this function is not defined, a simple 'struct inode' is allocated. Normally alloc_inode will be used to allocate a larger structure which -- cgit v0.10.2 From 99358a1ca53e8e6ce09423500191396f0e6584d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Aug 2014 20:13:40 +0100 Subject: [jffs2] kill wbuf_queued/wbuf_dwork_lock schedule_delayed_work() happening when the work is already pending is a cheap no-op. Don't bother with ->wbuf_queued logics - it's both broken (cancelling ->wbuf_dwork leaves it set, as spotted by Jeff Harris) and pointless. It's cheaper to let schedule_delayed_work() handle that case. Reported-by: Jeff Harris Tested-by: Jeff Harris Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 413ef89..046fee8 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -134,8 +134,6 @@ struct jffs2_sb_info { struct rw_semaphore wbuf_sem; /* Protects the write buffer */ struct delayed_work wbuf_dwork; /* write-buffer write-out work */ - int wbuf_queued; /* non-zero delayed work is queued */ - spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a6597d6..09ed551 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work) struct jffs2_sb_info *c = work_to_sb(work); struct super_block *sb = OFNI_BS_2SFFJ(c); - spin_lock(&c->wbuf_dwork_lock); - c->wbuf_queued = 0; - spin_unlock(&c->wbuf_dwork_lock); - if (!(sb->s_flags & MS_RDONLY)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); @@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c) if (sb->s_flags & MS_RDONLY) return; - spin_lock(&c->wbuf_dwork_lock); - if (!c->wbuf_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay)) jffs2_dbg(1, "%s()\n", __func__); - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); - c->wbuf_queued = 1; - } - spin_unlock(&c->wbuf_dwork_lock); } int jffs2_nand_flash_setup(struct jffs2_sb_info *c) @@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->erasesize; @@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; @@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; -- cgit v0.10.2 From b8314f9303a985354f445763960c0db2d7948891 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Mon, 11 Aug 2014 11:46:53 +0900 Subject: dcache: Fix no spaces at the start of a line in dcache.c Fixed coding style in dcache.c Signed-off-by: Daeseok Youn Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 21eee4c..8221faae 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2093,10 +2093,10 @@ struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) struct dentry *dentry; unsigned seq; - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup(parent, name); - if (dentry) + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) break; } while (read_seqretry(&rename_lock, seq)); return dentry; -- cgit v0.10.2 From 475d0db742e3755c6b267f48577ff7cbb7dfda0d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 17 May 2014 20:56:38 +0900 Subject: fs: Fix theoretical division by 0 in super_cache_scan(). total_objects could be 0 and is used as a denom. While total_objects is a "long", total_objects == 0 unlikely happens for 3.12 and later kernels because 32-bit architectures would not be able to hold (1 << 32) objects. However, total_objects == 0 may happen for kernels between 3.1 and 3.11 because total_objects in prune_super() was an "int" and (e.g.) x86_64 architecture might be able to hold (1 << 32) objects. Signed-off-by: Tetsuo Handa Reviewed-by: Christoph Hellwig Cc: stable # 3.1+ Signed-off-by: Al Viro diff --git a/fs/super.c b/fs/super.c index b9a214d..6f8c954 100644 --- a/fs/super.c +++ b/fs/super.c @@ -80,6 +80,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink, inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); total_objects = dentries + inodes + fs_objects + 1; + if (!total_objects) + total_objects = 1; /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); -- cgit v0.10.2 From c35e02480014f7a86e264a2fda39a568690163da Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 1 Aug 2014 09:27:22 -0400 Subject: Add copy_to_iter(), copy_from_iter() and iov_iter_zero() For DAX, we want to be able to copy between iovecs and kernel addresses that don't necessarily have a struct page. This is a fairly simple rearrangement for bvec iters to kmap the pages outside and pass them in, but for user iovecs it gets more complicated because we might try various different ways to kmap the memory. Duplicating the existing logic works out best in this case. We need to be able to write zeroes to an iovec for reads from unwritten ranges in a file. This is performed by the new iov_iter_zero() function, again patterned after the existing code that handles iovec iterators. [AV: and export the buggers...] Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro diff --git a/include/linux/uio.h b/include/linux/uio.h index 290fbf0..9b15814 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i); +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); +size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 9a09f20..eafcf60 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -4,6 +4,96 @@ #include #include +static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -166,6 +256,50 @@ done: return wanted - bytes; } +static size_t zero_iovec(size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __clear_user(buf, copy); + copy -= left; + skip += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __clear_user(buf, copy); + copy -= left; + skip = copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { @@ -414,12 +548,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t kunmap_atomic(to); } -static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) +static void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + +static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i) { size_t skip, copy, wanted; const struct bio_vec *bvec; - void *kaddr, *from; if (unlikely(bytes > i->count)) bytes = i->count; @@ -432,8 +571,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by skip = i->iov_offset; copy = min_t(size_t, bytes, bvec->bv_len - skip); - kaddr = kmap_atomic(page); - from = kaddr + offset; memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); skip += copy; from += copy; @@ -446,7 +583,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by from += copy; bytes -= copy; } - kunmap_atomic(kaddr); if (skip == bvec->bv_len) { bvec++; skip = 0; @@ -458,12 +594,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by return wanted - bytes; } -static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) +static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i) { size_t skip, copy, wanted; const struct bio_vec *bvec; - void *kaddr, *to; if (unlikely(bytes > i->count)) bytes = i->count; @@ -475,10 +609,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bvec = i->bvec; skip = i->iov_offset; - kaddr = kmap_atomic(page); - - to = kaddr + offset; - copy = min(bytes, bvec->bv_len - skip); memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); @@ -495,7 +625,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t to += copy; bytes -= copy; } - kunmap_atomic(kaddr); if (skip == bvec->bv_len) { bvec++; skip = 0; @@ -507,6 +636,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t return wanted; } +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, + size_t bytes, struct iov_iter *i) +{ + void *kaddr = kmap_atomic(page); + size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; +} + +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, + size_t bytes, struct iov_iter *i) +{ + void *kaddr = kmap_atomic(page); + size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; +} + +static size_t zero_bvec(size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + copy = min_t(size_t, bytes, bvec->bv_len - skip); + + memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy); + skip += copy; + bytes -= copy; + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memzero_page(bvec->bv_page, bvec->bv_offset, copy); + skip = copy; + bytes -= copy; + } + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t copy_from_user_bvec(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { @@ -672,6 +856,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, } EXPORT_SYMBOL(copy_page_from_iter); +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_to_iter_bvec(addr, bytes, i); + else + return copy_to_iter_iovec(addr, bytes, i); +} +EXPORT_SYMBOL(copy_to_iter); + +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_from_iter_bvec(addr, bytes, i); + else + return copy_from_iter_iovec(addr, bytes, i); +} +EXPORT_SYMBOL(copy_from_iter); + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) { + return zero_bvec(bytes, i); + } else { + return zero_iovec(bytes, i); + } +} +EXPORT_SYMBOL(iov_iter_zero); + size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { -- cgit v0.10.2 From c2ca0fcd202863b14bd041a7fece2e789926c225 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 27 Jul 2014 13:00:41 -0400 Subject: fs: make cont_expand_zero interruptible This patch makes it possible to kill a process looping in cont_expand_zero. A process may spend a lot of time in this function, so it is desirable to be able to kill it. It happened to me that I wanted to copy a piece data from the disk to a file. By mistake, I used the "seek" parameter to dd instead of "skip". Due to the "seek" parameter, dd attempted to extend the file and became stuck doing so - the only possibility was to reset the machine or wait many hours until the filesystem runs out of space and cont_expand_zero fails. We need this patch to be able to terminate the process. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/buffer.c b/fs/buffer.c index 3588a80..c8a1c01 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2318,6 +2318,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, err = 0; balance_dirty_pages_ratelimited(mapping); + + if (unlikely(fatal_signal_pending(current))) { + err = -EINTR; + goto out; + } } /* page covers the boundary, find the boundary offset */ -- cgit v0.10.2 From 35c265e008b2c4e56765f07d47750d13f95a55c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 20:25:34 -0400 Subject: cifs: switch to use of %p[dD] Signed-off-by: Al Viro diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6cbd9c6..0736406 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -461,8 +461,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, xid = get_xid(); - cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + inode, direntry, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); if (IS_ERR(tlink)) { @@ -540,8 +540,8 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, struct cifs_fid fid; __u32 oplock; - cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + inode, direntry, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); @@ -713,8 +713,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, xid = get_xid(); - cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", - parent_dir_inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + parent_dir_inode, direntry, direntry); /* check whether path exists */ @@ -833,7 +833,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { int rc = 0; - cifs_dbg(FYI, "In cifs d_delete, name = %s\n", direntry->d_name.name); + cifs_dbg(FYI, "In cifs d_delete, name = %pd\n", direntry); return rc; } */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7c018a1..dc3c7e6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1650,8 +1650,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, cifs_sb = CIFS_SB(dentry->d_sb); - cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n", - write_size, *offset, dentry->d_name.name); + cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", + write_size, *offset, dentry); tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -2273,8 +2273,8 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_zap_mapping(inode); @@ -2315,8 +2315,8 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7899a40..8fd4ee8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1419,8 +1419,8 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, d_instantiate(dentry, newinode); #ifdef CONFIG_CIFS_DEBUG2 - cifs_dbg(FYI, "instantiated dentry %p %s to inode %p\n", - dentry, dentry->d_name.name, newinode); + cifs_dbg(FYI, "instantiated dentry %p %pd to inode %p\n", + dentry, dentry, newinode); if (newinode->i_nlink != 2) cifs_dbg(FYI, "unexpected number of links %d\n", @@ -2111,8 +2111,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct cifs_unix_set_info_args *args = NULL; struct cifsFileInfo *open_file; - cifs_dbg(FYI, "setattr_unix on file %s attrs->ia_valid=0x%x\n", - direntry->d_name.name, attrs->ia_valid); + cifs_dbg(FYI, "setattr_unix on file %pd attrs->ia_valid=0x%x\n", + direntry, attrs->ia_valid); xid = get_xid(); @@ -2254,8 +2254,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - cifs_dbg(FYI, "setattr on file %s attrs->iavalid 0x%x\n", - direntry->d_name.name, attrs->ia_valid); + cifs_dbg(FYI, "setattr on file %pd attrs->iavalid 0x%x\n", + direntry, attrs->ia_valid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; -- cgit v0.10.2 From 4b8e992392a2468f90ae8b5b9f3e95c5f54a61c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 20:17:38 -0400 Subject: 9p: switch to %p[dD] Signed-off-by: Al Viro diff --git a/fs/9p/fid.c b/fs/9p/fid.c index d51ec9f..47db55a 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -65,8 +65,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) { struct p9_fid *fid, *ret; - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", - dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid), + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n", + dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; /* we'll recheck under lock if there's anything to look in */ diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index cc1cfae..eb14e05 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -266,8 +266,8 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) * Now that we do caching with cache mode enabled, We need * to support direct IO */ - p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, + p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n", + iocb->ki_filp, (long long)pos, iter->nr_segs); return -EINVAL; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index b03dd23..a345b2d 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -49,8 +49,8 @@ */ static int v9fs_cached_dentry_delete(const struct dentry *dentry) { - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", + dentry, dentry); /* Don't cache negative dentries */ if (!dentry->d_inode) @@ -67,8 +67,8 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry) static void v9fs_dentry_release(struct dentry *dentry) { struct hlist_node *p, *n; - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", + dentry, dentry); hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) p9_client_clunk(hlist_entry(p, struct p9_fid, dlist)); dentry->d_fsdata = NULL; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0b3bfa3..4f11510 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -116,7 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int reclen = 0; struct p9_rdir *rdir; - p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; @@ -172,7 +172,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) struct p9_rdir *rdir; struct p9_dirent curdirent; - p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_READDIRHDRSZ; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 520c11c..5594505 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -301,8 +301,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = file_inode(filp); int ret = -ENOLCK; - p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", - filp, cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n", + filp, cmd, fl, filp); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) @@ -337,8 +337,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd, struct inode *inode = file_inode(filp); int ret = -ENOLCK; - p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", - filp, cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n", + filp, cmd, fl, filp); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7fa4f7a..296482f 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -648,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, struct p9_fid *dfid, *ofid, *fid; struct inode *inode; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; ofid = NULL; @@ -755,7 +755,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct p9_fid *fid; struct v9fs_session_info *v9ses; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); @@ -791,8 +791,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode; char *name; - p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n", - dir, dentry->d_name.name, dentry, flags); + p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n", + dir, dentry, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -1239,7 +1239,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) struct p9_fid *fid; struct p9_wstat *st; - p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %pd\n", dentry); retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); @@ -1262,8 +1262,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) retval = min(strlen(st->extension)+1, (size_t)buflen); memcpy(buffer, st->extension, retval); - p9_debug(P9_DEBUG_VFS, "%s -> %s (%.*s)\n", - dentry->d_name.name, st->extension, buflen, buffer); + p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n", + dentry, st->extension, buflen, buffer); done: p9stat_free(st); @@ -1283,7 +1283,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) int len = 0; char *link = __getname(); - p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); if (!link) link = ERR_PTR(-ENOMEM); @@ -1314,8 +1314,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); - p9_debug(P9_DEBUG_VFS, " %s %s\n", - dentry->d_name.name, IS_ERR(s) ? "" : s); + p9_debug(P9_DEBUG_VFS, " %pd %s\n", + dentry, IS_ERR(s) ? "" : s); if (!IS_ERR(s)) __putname(s); } @@ -1364,8 +1364,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", - dir->i_ino, dentry->d_name.name, symname); + p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n", + dir->i_ino, dentry, symname); return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); } @@ -1386,8 +1386,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, char *name; struct p9_fid *oldfid; - p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", - dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", + dir->i_ino, dentry, old_dentry); oldfid = v9fs_fid_clone(old_dentry); if (IS_ERR(oldfid)) @@ -1428,8 +1428,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde char *name; u32 perm; - p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", - dir->i_ino, dentry->d_name.name, mode, + p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", + dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 1fa85aa..02b64f4 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -393,7 +393,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); @@ -767,8 +767,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; - p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", - dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n", + dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); dir_dentry = dentry->d_parent; @@ -917,7 +917,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) char *link = __getname(); char *target; - p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); if (!link) { link = ERR_PTR(-ENOMEM); -- cgit v0.10.2 From 91360b02ab483fc96a70b8c3016838f5d3725f99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 11:08:22 -0400 Subject: ashmem: use vfs_llseek() Signed-off-by: Al Viro diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 713a972..ad4f579 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -339,7 +339,7 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) goto out; } - ret = asma->file->f_op->llseek(asma->file, offset, origin); + ret = vfs_llseek(asma->file, offset, origin); if (ret < 0) goto out; -- cgit v0.10.2 From 594822918de20bf3a50afbc4de65b6f2971a92db Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 11:28:35 -0400 Subject: vme: don't open-code fixed_size_llseek() Signed-off-by: Al Viro diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c index 920e50a..cdd2ff7 100644 --- a/drivers/staging/vme/devices/vme_user.c +++ b/drivers/staging/vme/devices/vme_user.c @@ -410,41 +410,19 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf, static loff_t vme_user_llseek(struct file *file, loff_t off, int whence) { - loff_t absolute = -1; unsigned int minor = MINOR(file_inode(file)->i_rdev); size_t image_size; + loff_t res; if (minor == CONTROL_MINOR) return -EINVAL; mutex_lock(&image[minor].mutex); image_size = vme_get_size(image[minor].resource); - - switch (whence) { - case SEEK_SET: - absolute = off; - break; - case SEEK_CUR: - absolute = file->f_pos + off; - break; - case SEEK_END: - absolute = image_size + off; - break; - default: - mutex_unlock(&image[minor].mutex); - return -EINVAL; - } - - if ((absolute < 0) || (absolute >= image_size)) { - mutex_unlock(&image[minor].mutex); - return -EINVAL; - } - - file->f_pos = absolute; - + res = fixed_size_llseek(file, off, whence, image_size); mutex_unlock(&image[minor].mutex); - return absolute; + return res; } /* -- cgit v0.10.2 From 2ec3a12a667847d303d4d0c0576d5ff388052b48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 11:48:09 -0400 Subject: cachefiles_write_page(): switch to __kernel_write() Signed-off-by: Al Viro diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 25e745b..616db0e7 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -880,7 +880,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) { struct cachefiles_object *object; struct cachefiles_cache *cache; - mm_segment_t old_fs; struct file *file; struct path path; loff_t pos, eof; @@ -914,36 +913,27 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) if (IS_ERR(file)) { ret = PTR_ERR(file); } else { - ret = -EIO; - if (file->f_op->write) { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } + pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); } - - data = kmap(page); - file_start_write(file); - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = file->f_op->write( - file, (const void __user *) data, len, &pos); - set_fs(old_fs); - kunmap(page); - file_end_write(file); - if (ret != len) - ret = -EIO; } + + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + if (ret != len) + ret = -EIO; fput(file); } diff --git a/fs/read_write.c b/fs/read_write.c index 009d854..7d9318c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -513,6 +513,8 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t return ret; } +EXPORT_SYMBOL(__kernel_write); + ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { ssize_t ret; -- cgit v0.10.2 From 1a37f5ecb707aded73133e14b58102343de56bbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 12:06:18 -0400 Subject: carma-fpga: switch to fixed_size_llseek() Signed-off-by: Al Viro diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c index 7be8983..f390dee 100644 --- a/drivers/misc/carma/carma-fpga-program.c +++ b/drivers/misc/carma/carma-fpga-program.c @@ -767,26 +767,7 @@ static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) if ((filp->f_flags & O_ACCMODE) != O_RDONLY) return -EINVAL; - switch (origin) { - case SEEK_SET: /* seek relative to the beginning of the file */ - newpos = offset; - break; - case SEEK_CUR: /* seek relative to current position in the file */ - newpos = filp->f_pos + offset; - break; - case SEEK_END: /* seek relative to the end of the file */ - newpos = priv->fw_size - offset; - break; - default: - return -EINVAL; - } - - /* check for sanity */ - if (newpos > priv->fw_size) - return -EINVAL; - - filp->f_pos = newpos; - return newpos; + return fixed_size_llseek(file, offset, origin, priv->fw_size); } static const struct file_operations fpga_fops = { -- cgit v0.10.2 From d88c242623e90c99864317baae1e192bece2af57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 12:08:37 -0400 Subject: carma-fpga: switch to simple_read_from_buffer() Signed-off-by: Al Viro diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c index f390dee..7e97e53 100644 --- a/drivers/misc/carma/carma-fpga-program.c +++ b/drivers/misc/carma/carma-fpga-program.c @@ -749,13 +749,8 @@ static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct fpga_dev *priv = filp->private_data; - - count = min_t(size_t, priv->bytes - *f_pos, count); - if (copy_to_user(buf, priv->vb.vaddr + *f_pos, count)) - return -EFAULT; - - *f_pos += count; - return count; + return simple_read_from_buffer(buf, count, ppos, + priv->vb.vaddr, priv->bytes); } static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) -- cgit v0.10.2 From 8e3fb059ae7c246ff906c3b988d0de1d66809e84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Aug 2014 14:42:04 -0400 Subject: rsxx debugfs inanity check with the author of that horror... Signed-off-by: Al Viro diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 820b400..3265ce9 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -62,12 +62,6 @@ static DEFINE_SPINLOCK(rsxx_ida_lock); /* --------------------Debugfs Setup ------------------- */ -struct rsxx_cram { - u32 f_pos; - u32 offset; - void *i_private; -}; - static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) { struct rsxx_cardinfo *card = m->private; @@ -184,93 +178,50 @@ static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - st = copy_to_user(ubuf, buf, cnt); + st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); + if (!st) + st = copy_to_user(ubuf, buf, cnt); + kfree(buf); if (st) return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = copy_from_user(buf, ubuf, cnt); + if (!st) + st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt, + buf, 1); + kfree(buf); if (st) return st; - - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } -static int rsxx_cram_open(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - info->i_private = inode->i_private; - info->f_pos = file->f_pos; - file->private_data = info; - - return 0; -} - -static int rsxx_cram_release(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = file->private_data; - - if (!info) - return 0; - - kfree(info); - file->private_data = NULL; - - return 0; -} - static const struct file_operations debugfs_cram_fops = { .owner = THIS_MODULE, - .open = rsxx_cram_open, .read = rsxx_cram_read, .write = rsxx_cram_write, - .release = rsxx_cram_release, }; static const struct file_operations debugfs_stats_fops = { -- cgit v0.10.2 From a1f6dbac629e36f89a1332b5ae773b831c136ee9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Aug 2014 11:05:50 -0400 Subject: dma-buf: don't open-code atomic_long_read() ... not to mention that even atomic_long_read() is too low-level here - there's file_count(). Signed-off-by: Al Viro diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f3014c4..5be225c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -799,7 +799,7 @@ static int dma_buf_describe(struct seq_file *s) seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, - (long)(buf_obj->file->f_count.counter), + file_count(buf_obj->file), buf_obj->exp_name); seq_puts(s, "\tAttached Devices:\n"); -- cgit v0.10.2 From 13ba33e89991f6c020a36cfac0001dd54281e67c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 Aug 2014 10:04:12 -0400 Subject: switch /dev/zero and /dev/full to ->read_iter() Signed-off-by: Al Viro diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 917403f..524b707 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -622,53 +622,23 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out, return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); } -static ssize_t read_zero(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter) { - size_t written; - - if (!count) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - written = 0; - while (count) { - unsigned long unwritten; - size_t chunk = count; + size_t written = 0; + while (iov_iter_count(iter)) { + size_t chunk = iov_iter_count(iter), n; if (chunk > PAGE_SIZE) chunk = PAGE_SIZE; /* Just for latency reasons */ - unwritten = __clear_user(buf, chunk); - written += chunk - unwritten; - if (unwritten) - break; + n = iov_iter_zero(chunk, iter); + if (!n && iov_iter_count(iter)) + return written ? written : -EFAULT; + written += n; if (signal_pending(current)) return written ? written : -ERESTARTSYS; - buf += chunk; - count -= chunk; cond_resched(); } - return written ? written : -EFAULT; -} - -static ssize_t aio_read_zero(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t written = 0; - unsigned long i; - ssize_t ret; - - for (i = 0; i < nr_segs; i++) { - ret = read_zero(iocb->ki_filp, iov[i].iov_base, iov[i].iov_len, - &pos); - if (ret < 0) - break; - written += ret; - } - - return written ? written : -EFAULT; + return written; } static int mmap_zero(struct file *file, struct vm_area_struct *vma) @@ -738,7 +708,6 @@ static int open_port(struct inode *inode, struct file *filp) #define zero_lseek null_lseek #define full_lseek null_lseek #define write_zero write_null -#define read_full read_zero #define aio_write_zero aio_write_null #define open_mem open_port #define open_kmem open_mem @@ -783,9 +752,9 @@ static const struct file_operations port_fops = { static const struct file_operations zero_fops = { .llseek = zero_lseek, - .read = read_zero, + .read = new_sync_read, .write = write_zero, - .aio_read = aio_read_zero, + .read_iter = read_iter_zero, .aio_write = aio_write_zero, .mmap = mmap_zero, }; @@ -802,7 +771,8 @@ static struct backing_dev_info zero_bdi = { static const struct file_operations full_fops = { .llseek = full_lseek, - .read = read_full, + .read = new_sync_read, + .read_iter = read_iter_zero, .write = write_full, }; -- cgit v0.10.2 From 512b2268156a4e15ebf897f9a883bdee153a54b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Aug 2014 11:28:14 -0400 Subject: switch hci_vhci to ->write_iter() Signed-off-by: Al Viro diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 5bb5872..6653473 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -160,13 +160,11 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode) } static inline ssize_t vhci_get_user(struct vhci_data *data, - const struct iovec *iov, - unsigned long count) + struct iov_iter *from) { - size_t len = iov_length(iov, count); + size_t len = iov_iter_count(from); struct sk_buff *skb; __u8 pkt_type, opcode; - unsigned long i; int ret; if (len < 2 || len > HCI_MAX_FRAME_SIZE) @@ -176,12 +174,9 @@ static inline ssize_t vhci_get_user(struct vhci_data *data, if (!skb) return -ENOMEM; - for (i = 0; i < count; i++) { - if (copy_from_user(skb_put(skb, iov[i].iov_len), - iov[i].iov_base, iov[i].iov_len)) { - kfree_skb(skb); - return -EFAULT; - } + if (copy_from_iter(skb_put(skb, len), len, from) != len) { + kfree_skb(skb); + return -EFAULT; } pkt_type = *((__u8 *) skb->data); @@ -294,13 +289,12 @@ static ssize_t vhci_read(struct file *file, return ret; } -static ssize_t vhci_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long count, loff_t pos) +static ssize_t vhci_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhci_data *data = file->private_data; - return vhci_get_user(data, iov, count); + return vhci_get_user(data, from); } static unsigned int vhci_poll(struct file *file, poll_table *wait) @@ -365,7 +359,7 @@ static int vhci_release(struct inode *inode, struct file *file) static const struct file_operations vhci_fops = { .owner = THIS_MODULE, .read = vhci_read, - .aio_write = vhci_write, + .write_iter = vhci_write, .poll = vhci_poll, .open = vhci_open, .release = vhci_release, -- cgit v0.10.2 From cd678fce428018dee0c9345ed63ebf9920d9902f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Aug 2014 12:20:37 -0400 Subject: switch logger to ->write_iter() Signed-off-by: Al Viro diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 0bf0d24..28b93d3 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -411,69 +411,18 @@ static void fix_up_readers(struct logger_log *log, size_t len) } /* - * do_write_log - writes 'len' bytes from 'buf' to 'log' - * - * The caller needs to hold log->mutex. - */ -static void do_write_log(struct logger_log *log, const void *buf, size_t count) -{ - size_t len; - - len = min(count, log->size - log->w_off); - memcpy(log->buffer + log->w_off, buf, len); - - if (count != len) - memcpy(log->buffer, buf + len, count - len); - - log->w_off = logger_offset(log, log->w_off + count); - -} - -/* - * do_write_log_user - writes 'len' bytes from the user-space buffer 'buf' to - * the log 'log' - * - * The caller needs to hold log->mutex. - * - * Returns 'count' on success, negative error code on failure. - */ -static ssize_t do_write_log_from_user(struct logger_log *log, - const void __user *buf, size_t count) -{ - size_t len; - - len = min(count, log->size - log->w_off); - if (len && copy_from_user(log->buffer + log->w_off, buf, len)) - return -EFAULT; - - if (count != len) - if (copy_from_user(log->buffer, buf + len, count - len)) - /* - * Note that by not updating w_off, this abandons the - * portion of the new entry that *was* successfully - * copied, just above. This is intentional to avoid - * message corruption from missing fragments. - */ - return -EFAULT; - - log->w_off = logger_offset(log, log->w_off + count); - - return count; -} - -/* - * logger_aio_write - our write method, implementing support for write(), + * logger_write_iter - our write method, implementing support for write(), * writev(), and aio_write(). Writes are our fast path, and we try to optimize * them above all else. */ -static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t ppos) +static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct logger_log *log = file_get_log(iocb->ki_filp); - size_t orig; struct logger_entry header; struct timespec now; - ssize_t ret = 0; + size_t len, count; + + count = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); now = current_kernel_time(); @@ -482,7 +431,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.sec = now.tv_sec; header.nsec = now.tv_nsec; header.euid = current_euid(); - header.len = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); + header.len = count; header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ @@ -491,8 +440,6 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&log->mutex); - orig = log->w_off; - /* * Fix up any readers, pulling them forward to the first readable * entry after (what will be) the new write offset. We do this now @@ -501,33 +448,35 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, */ fix_up_readers(log, sizeof(struct logger_entry) + header.len); - do_write_log(log, &header, sizeof(struct logger_entry)); - - while (nr_segs-- > 0) { - size_t len; - ssize_t nr; + len = min(sizeof(header), log->size - log->w_off); + memcpy(log->buffer + log->w_off, &header, len); + memcpy(log->buffer, (char *)&header + len, sizeof(header) - len); - /* figure out how much of this vector we can keep */ - len = min_t(size_t, iov->iov_len, header.len - ret); + len = min(count, log->size - log->w_off); - /* write out this segment's payload */ - nr = do_write_log_from_user(log, iov->iov_base, len); - if (unlikely(nr < 0)) { - log->w_off = orig; - mutex_unlock(&log->mutex); - return nr; - } + if (copy_from_iter(log->buffer + log->w_off, len, from) != len) { + /* + * Note that by not updating w_off, this abandons the + * portion of the new entry that *was* successfully + * copied, just above. This is intentional to avoid + * message corruption from missing fragments. + */ + mutex_unlock(&log->mutex); + return -EFAULT; + } - iov++; - ret += nr; + if (copy_from_iter(log->buffer, count - len, from) != count - len) { + mutex_unlock(&log->mutex); + return -EFAULT; } + log->w_off = logger_offset(log, log->w_off + count); mutex_unlock(&log->mutex); /* wake up any blocked readers */ wake_up_interruptible(&log->wq); - return ret; + return len; } static struct logger_log *get_log_from_minor(int minor) @@ -736,7 +685,7 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations logger_fops = { .owner = THIS_MODULE, .read = logger_read, - .aio_write = logger_aio_write, + .write_iter = logger_write_iter, .poll = logger_poll, .unlocked_ioctl = logger_ioctl, .compat_ioctl = logger_ioctl, -- cgit v0.10.2 From 849f3127bb46ef75a66dffc1b9b0d3f5f43fa395 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Aug 2014 12:23:53 -0400 Subject: switch /dev/kmsg to ->write_iter() Signed-off-by: Al Viro diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1ce7706..7a6e694 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -519,14 +519,13 @@ struct devkmsg_user { char buf[8192]; }; -static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { char *buf, *line; int i; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ - size_t len = iov_length(iv, count); + size_t len = iocb->ki_nbytes; ssize_t ret = len; if (len > LOG_LINE_MAX) @@ -535,13 +534,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, if (buf == NULL) return -ENOMEM; - line = buf; - for (i = 0; i < count; i++) { - if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) { - ret = -EFAULT; - goto out; - } - line += iv[i].iov_len; + buf[len] = '\0'; + if (copy_from_iter(buf, len, from) != len) { + kfree(buf); + return -EFAULT; } /* @@ -567,10 +563,8 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, line = endp; } } - line[len] = '\0'; printk_emit(facility, level, NULL, 0, "%s", line); -out: kfree(buf); return ret; } @@ -802,7 +796,7 @@ static int devkmsg_release(struct inode *inode, struct file *file) const struct file_operations kmsg_fops = { .open = devkmsg_open, .read = devkmsg_read, - .aio_write = devkmsg_writev, + .write_iter = devkmsg_write, .llseek = devkmsg_llseek, .poll = devkmsg_poll, .release = devkmsg_release, -- cgit v0.10.2 From 8ba7f6c2faada3ad553518b9febbdce7a988359b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Aug 2014 12:37:29 -0400 Subject: saner perf_atoll() That loop in there is both anti-idiomatic *and* completely pointless. strtoll() is there for purpose; use it and compare what's left with acceptable suffices. Signed-off-by: Al Viro diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 2553e5b..d87767f 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -9,78 +9,48 @@ */ s64 perf_atoll(const char *str) { - unsigned int i; - s64 length = -1, unit = 1; + s64 length; + char *p; + char c; if (!isdigit(str[0])) goto out_err; - for (i = 1; i < strlen(str); i++) { - switch (str[i]) { - case 'B': - case 'b': - break; - case 'K': - if (str[i + 1] != 'B') - goto out_err; - else - goto kilo; - case 'k': - if (str[i + 1] != 'b') - goto out_err; -kilo: - unit = K; - break; - case 'M': - if (str[i + 1] != 'B') - goto out_err; - else - goto mega; - case 'm': - if (str[i + 1] != 'b') + length = strtoll(str, &p, 10); + switch (c = *p++) { + case 'b': case 'B': + if (*p) goto out_err; -mega: - unit = K * K; + case '\0': + return length; + default: + goto out_err; + /* two-letter suffices */ + case 'k': case 'K': + length <<= 10; break; - case 'G': - if (str[i + 1] != 'B') - goto out_err; - else - goto giga; - case 'g': - if (str[i + 1] != 'b') - goto out_err; -giga: - unit = K * K * K; + case 'm': case 'M': + length <<= 20; break; - case 'T': - if (str[i + 1] != 'B') - goto out_err; - else - goto tera; - case 't': - if (str[i + 1] != 'b') - goto out_err; -tera: - unit = K * K * K * K; + case 'g': case 'G': + length <<= 30; break; - case '\0': /* only specified figures */ - unit = 1; + case 't': case 'T': + length <<= 40; break; - default: - if (!isdigit(str[i])) - goto out_err; - break; - } } - - length = atoll(str) * unit; - goto out; + /* we want the cases to match */ + if (islower(c)) { + if (strcmp(p, "b") != 0) + goto out_err; + } else { + if (strcmp(p, "B") != 0) + goto out_err; + } + return length; out_err: - length = -1; -out: - return length; + return -1; } /* -- cgit v0.10.2 From b8850d1fa8e2f6653e57daf6d08e58c5f5eb2c85 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Thu, 28 Aug 2014 11:26:03 -0600 Subject: fs: namespace: suppress 'may be used uninitialized' warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gcc version 4.9.1 compiler complains Even though it isn't possible for these variables to not get initialized before they are used. fs/namespace.c: In function ‘SyS_mount’: fs/namespace.c:2720:8: warning: ‘kernel_dev’ may be used uninitialized in this function [-Wmaybe-uninitialized] ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, ^ fs/namespace.c:2699:8: note: ‘kernel_dev’ was declared here char *kernel_dev; ^ fs/namespace.c:2720:8: warning: ‘kernel_type’ may be used uninitialized in this function [-Wmaybe-uninitialized] ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, ^ fs/namespace.c:2697:8: note: ‘kernel_type’ was declared here char *kernel_type; ^ Fix the warnings by simplifying copy_mount_string() as suggested by Al Viro. Cc: Alexander Viro Signed-off-by: Tim Gardner Signed-off-by: Al Viro diff --git a/fs/compat.c b/fs/compat.c index 66d3d3c..6205c24 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -797,8 +797,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, struct filename *dir; int retval; - retval = copy_mount_string(type, &kernel_type); - if (retval < 0) + kernel_type = copy_mount_string(type); + retval = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out; dir = getname(dir_name); @@ -806,8 +807,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, if (IS_ERR(dir)) goto out1; - retval = copy_mount_string(dev_name, &kernel_dev); - if (retval < 0) + kernel_dev = copy_mount_string(dev_name); + retval = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out2; retval = copy_mount_options(data, &data_page); diff --git a/fs/internal.h b/fs/internal.h index e325b4f..bd4ac19 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -51,7 +51,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); -extern int copy_mount_string(const void __user *, char **); +extern char *copy_mount_string(const void __user *); extern struct vfsmount *lookup_mnt(struct path *); extern int finish_automount(struct vfsmount *, struct path *); diff --git a/fs/namespace.c b/fs/namespace.c index 00e5b1e..abd3abb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2514,21 +2514,9 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } -int copy_mount_string(const void __user *data, char **where) +char *copy_mount_string(const void __user *data) { - char *tmp; - - if (!data) { - *where = NULL; - return 0; - } - - tmp = strndup_user(data, PAGE_SIZE); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - *where = tmp; - return 0; + return data ? strndup_user(data, PAGE_SIZE) : NULL; } /* @@ -2794,8 +2782,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char *kernel_dev; unsigned long data_page; - ret = copy_mount_string(type, &kernel_type); - if (ret < 0) + kernel_type = copy_mount_string(type); + ret = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out_type; kernel_dir = getname(dir_name); @@ -2804,8 +2793,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, goto out_dir; } - ret = copy_mount_string(dev_name, &kernel_dev); - if (ret < 0) + kernel_dev = copy_mount_string(dev_name); + ret = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out_dev; ret = copy_mount_options(data, &data_page); -- cgit v0.10.2 From e983094d6dce524f3890edfec44b7ca6dbfa1183 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 31 Aug 2014 14:12:09 -0400 Subject: missing annotation in fs/file.c Signed-off-by: Al Viro diff --git a/fs/file.c b/fs/file.c index 66923fe..f3b2c20 100644 --- a/fs/file.c +++ b/fs/file.c @@ -750,6 +750,7 @@ bool get_close_on_exec(unsigned int fd) static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) +__releases(&files->file_lock) { struct file *tofree; struct fdtable *fdt; -- cgit v0.10.2 From 3cfb2face6205d30ecfc0145d68cd9e0c3dfe6f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 31 Aug 2014 15:06:09 -0400 Subject: nouveau: __iomem misannotations Signed-off-by: Al Viro diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c index 47ca886..6544b84 100644 --- a/drivers/gpu/drm/nouveau/nouveau_nvif.c +++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c @@ -40,12 +40,12 @@ #include "nouveau_usif.h" static void -nvkm_client_unmap(void *priv, void *ptr, u32 size) +nvkm_client_unmap(void *priv, void __iomem *ptr, u32 size) { iounmap(ptr); } -static void * +static void __iomem * nvkm_client_map(void *priv, u64 handle, u32 size) { return ioremap(handle, size); diff --git a/drivers/gpu/drm/nouveau/nvif/driver.h b/drivers/gpu/drm/nouveau/nvif/driver.h index b72a8f0..ac4bdb3 100644 --- a/drivers/gpu/drm/nouveau/nvif/driver.h +++ b/drivers/gpu/drm/nouveau/nvif/driver.h @@ -9,8 +9,8 @@ struct nvif_driver { int (*suspend)(void *priv); int (*resume)(void *priv); int (*ioctl)(void *priv, bool super, void *data, u32 size, void **hack); - void *(*map)(void *priv, u64 handle, u32 size); - void (*unmap)(void *priv, void *ptr, u32 size); + void __iomem *(*map)(void *priv, u64 handle, u32 size); + void (*unmap)(void *priv, void __iomem *ptr, u32 size); bool keep; }; diff --git a/drivers/gpu/drm/nouveau/nvif/object.h b/drivers/gpu/drm/nouveau/nvif/object.h index fac3a3b..fe51917 100644 --- a/drivers/gpu/drm/nouveau/nvif/object.h +++ b/drivers/gpu/drm/nouveau/nvif/object.h @@ -14,7 +14,7 @@ struct nvif_object { void *priv; /*XXX: hack */ void (*dtor)(struct nvif_object *); struct { - void *ptr; + void __iomem *ptr; u32 size; } map; }; @@ -42,7 +42,7 @@ void nvif_object_unmap(struct nvif_object *); struct nvif_object *_object = nvif_object(a); \ u32 _data; \ if (likely(_object->map.ptr)) \ - _data = ioread##b##_native((u8 *)_object->map.ptr + (c)); \ + _data = ioread##b##_native((u8 __iomem *)_object->map.ptr + (c)); \ else \ _data = nvif_object_rd(_object, (b) / 8, (c)); \ _data; \ @@ -50,7 +50,7 @@ void nvif_object_unmap(struct nvif_object *); #define nvif_wr(a,b,c,d) ({ \ struct nvif_object *_object = nvif_object(a); \ if (likely(_object->map.ptr)) \ - iowrite##b##_native((d), (u8 *)_object->map.ptr + (c)); \ + iowrite##b##_native((d), (u8 __iomem *)_object->map.ptr + (c)); \ else \ nvif_object_wr(_object, (b) / 8, (c), (d)); \ }) -- cgit v0.10.2 From 765d368217715c794f767d396d8cea16502d823f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2014 17:29:40 -0400 Subject: android: ->f_op is never NULL Signed-off-by: Al Viro diff --git a/drivers/staging/android/ion/compat_ion.c b/drivers/staging/android/ion/compat_ion.c index ee3a738..a402fda 100644 --- a/drivers/staging/android/ion/compat_ion.c +++ b/drivers/staging/android/ion/compat_ion.c @@ -125,7 +125,7 @@ long compat_ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { long ret; - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!filp->f_op->unlocked_ioctl) return -ENOTTY; switch (cmd) { -- cgit v0.10.2 From c2e3f5d5f4620bb6568bc559f712ce80222e20cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2014 17:31:28 -0400 Subject: ecryptfs: ->f_op is never NULL Signed-off-by: Al Viro diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index db0fad3..4ffa35e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -327,7 +327,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOIOCTLCMD; - if (lower_file->f_op && lower_file->f_op->compat_ioctl) + if (lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; } -- cgit v0.10.2 From 6b933de642d2726245df98d076cff09bf4d34dde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2014 19:42:14 -0400 Subject: [s390] remove pointless assignment of ->f_op in vmlogrdr ->open() The only way we can get to that function is from misc_open(), after the latter has set file->f_op to exactly the same value we are (re)assigning there. Signed-off-by: Al Viro diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index a8848db..9bb48d7 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -338,7 +338,6 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) /* set the file options */ filp->private_data = logptr; - filp->f_op = &vmlogrdr_fops; /* start recording for this service*/ if (logptr->autorecording) { -- cgit v0.10.2 From 9bb8730ed3d2658c6dd49b7f811231e2a0a2e3ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2014 23:53:04 -0400 Subject: jfs: don't hash direct inode hlist_add_fake(inode->i_hash), same as for the rest of special ones... Signed-off-by: Al Viro diff --git a/fs/jfs/super.c b/fs/jfs/super.c index adf8cb0..93e897e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -550,7 +550,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = 0; inode->i_size = sb->s_bdev->bd_inode->i_size; inode->i_mapping->a_ops = &jfs_metapage_aops; - insert_inode_hash(inode); + hlist_add_fake(&inode->i_hash); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; -- cgit v0.10.2 From 1bb27cacf4992b77556ed4487f99c76c4af3b43d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 3 Sep 2014 13:32:19 -0400 Subject: f_fs: saner API for ffs_sb_create_file() make it return dentry instead of inode Acked-by: Felipe Balbi Signed-off-by: Al Viro diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 0dc3552..4726e27 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -164,10 +164,9 @@ struct ffs_desc_helper { static int __must_check ffs_epfiles_create(struct ffs_data *ffs); static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count); -static struct inode *__must_check +static struct dentry * ffs_sb_create_file(struct super_block *sb, const char *name, void *data, - const struct file_operations *fops, - struct dentry **dentry_p); + const struct file_operations *fops); /* Devices management *******************************************************/ @@ -1096,10 +1095,9 @@ ffs_sb_make_inode(struct super_block *sb, void *data, } /* Create "regular" file */ -static struct inode *ffs_sb_create_file(struct super_block *sb, +static struct dentry *ffs_sb_create_file(struct super_block *sb, const char *name, void *data, - const struct file_operations *fops, - struct dentry **dentry_p) + const struct file_operations *fops) { struct ffs_data *ffs = sb->s_fs_info; struct dentry *dentry; @@ -1118,10 +1116,7 @@ static struct inode *ffs_sb_create_file(struct super_block *sb, } d_add(dentry, inode); - if (dentry_p) - *dentry_p = dentry; - - return inode; + return dentry; } /* Super block */ @@ -1166,7 +1161,7 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent) /* EP0 file */ if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs, - &ffs_ep0_operations, NULL))) + &ffs_ep0_operations))) return -ENOMEM; return 0; @@ -1535,9 +1530,10 @@ static int ffs_epfiles_create(struct ffs_data *ffs) mutex_init(&epfile->mutex); init_waitqueue_head(&epfile->wait); sprintf(epfiles->name, "ep%u", i); - if (!unlikely(ffs_sb_create_file(ffs->sb, epfiles->name, epfile, - &ffs_epfile_operations, - &epfile->dentry))) { + epfile->dentry = ffs_sb_create_file(ffs->sb, epfiles->name, + epfile, + &ffs_epfile_operations); + if (unlikely(!epfile->dentry)) { ffs_epfiles_destroy(epfiles, i - 1); return -ENOMEM; } -- cgit v0.10.2 From fb6c3225b49d389ff4a9a454fc8c3f3642632744 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 3 Sep 2014 13:37:56 -0400 Subject: gadgetfs: saner API for gadgetfs_create_file() return dentry, not inode. dev->inode is never used by anything, don't bother with storing it. Acked-by: Felipe Balbi Signed-off-by: Al Viro diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index e96077b..69202cd 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -198,7 +198,6 @@ struct ep_data { struct list_head epfiles; wait_queue_head_t wait; struct dentry *dentry; - struct inode *inode; }; static inline void get_ep (struct ep_data *data) @@ -1618,10 +1617,9 @@ static void destroy_ep_files (struct dev_data *dev) } -static struct inode * +static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, - void *data, const struct file_operations *fops, - struct dentry **dentry_p); + void *data, const struct file_operations *fops); static int activate_ep_files (struct dev_data *dev) { @@ -1649,10 +1647,9 @@ static int activate_ep_files (struct dev_data *dev) if (!data->req) goto enomem1; - data->inode = gadgetfs_create_file (dev->sb, data->name, - data, &ep_config_operations, - &data->dentry); - if (!data->inode) + data->dentry = gadgetfs_create_file (dev->sb, data->name, + data, &ep_config_operations); + if (!data->dentry) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); } @@ -2011,10 +2008,9 @@ gadgetfs_make_inode (struct super_block *sb, /* creates in fs root directory, so non-renamable and non-linkable. * so inode and dentry are paired, until device reconfig. */ -static struct inode * +static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, - void *data, const struct file_operations *fops, - struct dentry **dentry_p) + void *data, const struct file_operations *fops) { struct dentry *dentry; struct inode *inode; @@ -2030,8 +2026,7 @@ gadgetfs_create_file (struct super_block *sb, char const *name, return NULL; } d_add (dentry, inode); - *dentry_p = dentry; - return inode; + return dentry; } static const struct super_operations gadget_fs_operations = { @@ -2079,9 +2074,8 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) goto Enomem; dev->sb = sb; - if (!gadgetfs_create_file (sb, CHIP, - dev, &dev_init_operations, - &dev->dentry)) { + dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &dev_init_operations); + if (!dev->dentry) { put_dev(dev); goto Enomem; } -- cgit v0.10.2 From 18c85d09dcd14342cd4891d016f4086095160573 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Sep 2014 19:29:32 -0400 Subject: [infiniband] remove pointless assignments Signed-off-by: Al Viro diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index e0c404b..4977082 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index cab610c..8185458 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) -- cgit v0.10.2 From 4d93bc3e81736ce55c79d9cae743bab4f89b4f9c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 12 Sep 2014 18:21:05 -0400 Subject: gfs2_atomic_open(): skip lookups on hashed dentry hashed dentry can be passed to ->atomic_open() only if a) it has just passed revalidation and b) it's negative Signed-off-by: Al Viro diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fc8ac2e..8108b4f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1244,6 +1244,9 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *d; bool excl = !!(flags & O_EXCL); + if (!d_unhashed(dentry)) + goto skip_lookup; + d = __gfs2_lookup(dir, dentry, file, opened); if (IS_ERR(d)) return PTR_ERR(d); @@ -1260,6 +1263,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, } BUG_ON(d != NULL); + +skip_lookup: if (!(flags & O_CREAT)) return -ENOENT; -- cgit v0.10.2 From 5e6123f3477e4260fb14392f0a88f1a842fa4d42 Mon Sep 17 00:00:00 2001 From: Seunghun Lee Date: Sun, 14 Sep 2014 22:15:10 +0900 Subject: vfs: move getname() from callers to do_mount() It would make more sense to pass char __user * instead of char * in callers of do_mount() and do getname() inside do_mount(). Suggested-by: Al Viro Signed-off-by: Seunghun Lee Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 1402fcc..f9c732e 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -446,7 +446,8 @@ struct procfs_args { * unhappy with OSF UFS. [CHECKME] */ static int -osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) +osf_ufs_mount(const char __user *dirname, + struct ufs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) } static int -osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) +osf_cdfs_mount(const char __user *dirname, + struct cdfs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) } static int -osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags) +osf_procfs_mount(const char __user *dirname, + struct procfs_args __user *args, int flags) { struct procfs_args tmp; @@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, int, flag, void __user *, data) { int retval; - struct filename *name; - name = getname(path); - retval = PTR_ERR(name); - if (IS_ERR(name)) - goto out; switch (typenr) { case 1: - retval = osf_ufs_mount(name->name, data, flag); + retval = osf_ufs_mount(path, data, flag); break; case 6: - retval = osf_cdfs_mount(name->name, data, flag); + retval = osf_cdfs_mount(path, data, flag); break; case 9: - retval = osf_procfs_mount(name->name, data, flag); + retval = osf_procfs_mount(path, data, flag); break; default: retval = -EINVAL; printk("osf_mount(%ld, %x)\n", typenr, flag); } - putname(name); - out: + return retval; } diff --git a/fs/compat.c b/fs/compat.c index 6205c24..b13df99 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -794,7 +794,6 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, char *kernel_type; unsigned long data_page; char *kernel_dev; - struct filename *dir; int retval; kernel_type = copy_mount_string(type); @@ -802,19 +801,14 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, if (IS_ERR(kernel_type)) goto out; - dir = getname(dir_name); - retval = PTR_ERR(dir); - if (IS_ERR(dir)) - goto out1; - kernel_dev = copy_mount_string(dev_name); retval = PTR_ERR(kernel_dev); if (IS_ERR(kernel_dev)) - goto out2; + goto out1; retval = copy_mount_options(data, &data_page); if (retval < 0) - goto out3; + goto out2; retval = -EINVAL; @@ -823,19 +817,17 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, do_ncp_super_data_conv((void *)data_page); } else if (!strcmp(kernel_type, NFS4_NAME)) { if (do_nfs4_super_data_conv((void *) data_page)) - goto out4; + goto out3; } } - retval = do_mount(kernel_dev, dir->name, kernel_type, + retval = do_mount(kernel_dev, dir_name, kernel_type, flags, (void*)data_page); - out4: - free_page(data_page); out3: - kfree(kernel_dev); + free_page(data_page); out2: - putname(dir); + kfree(kernel_dev); out1: kfree(kernel_type); out: diff --git a/fs/namespace.c b/fs/namespace.c index abd3abb..348562f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2533,7 +2533,7 @@ char *copy_mount_string(const void __user *data) * Therefore, if this magic number is present, it carries no information * and must be discarded. */ -long do_mount(const char *dev_name, const char *dir_name, +long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; @@ -2545,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name, flags &= ~MS_MGC_MSK; /* Basic sanity checks */ - - if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) - return -EINVAL; - if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; /* ... and get the mountpoint */ - retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); + retval = user_path(dir_name, &path); if (retval) return retval; @@ -2778,7 +2774,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, { int ret; char *kernel_type; - struct filename *kernel_dir; char *kernel_dev; unsigned long data_page; @@ -2787,12 +2782,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, if (IS_ERR(kernel_type)) goto out_type; - kernel_dir = getname(dir_name); - if (IS_ERR(kernel_dir)) { - ret = PTR_ERR(kernel_dir); - goto out_dir; - } - kernel_dev = copy_mount_string(dev_name); ret = PTR_ERR(kernel_dev); if (IS_ERR(kernel_dev)) @@ -2802,15 +2791,13 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, if (ret < 0) goto out_data; - ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, + ret = do_mount(kernel_dev, dir_name, kernel_type, flags, (void *) data_page); free_page(data_page); out_data: kfree(kernel_dev); out_dev: - putname(kernel_dir); -out_dir: kfree(kernel_type); out_type: return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 75bdd51..9214836 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1855,7 +1855,8 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern long do_mount(const char *, const char *, const char *, unsigned long, void *); +extern long do_mount(const char *, const char __user *, + const char *, unsigned long, void *); extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, -- cgit v0.10.2 From 821cc3070ff54e39ab6624c843f1905d737d9ac0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Sep 2014 23:57:58 -0400 Subject: ncpfs: use list_for_each_entry() for d_subdirs walk Signed-off-by: Al Viro diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 08b8ea8..314e7ad 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -388,7 +388,6 @@ static struct dentry * ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) { struct dentry *dent = dentry; - struct list_head *next; if (d_validate(dent, parent)) { if (dent->d_name.len <= NCP_MAXPATHLEN && @@ -404,9 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_u.d_child); + list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) { if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); @@ -415,7 +412,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_unlock(&parent->d_lock); goto out; } - next = next->next; } spin_unlock(&parent->d_lock); return NULL; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 32c0658..52cb19d 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -188,20 +188,14 @@ static inline void ncp_renew_dentries(struct dentry *parent) { struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct list_head *next; struct dentry *dentry; spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); - + list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); else ncp_new_dentry(dentry); - - next = next->next; } spin_unlock(&parent->d_lock); } @@ -210,16 +204,12 @@ static inline void ncp_invalidate_dircache_entries(struct dentry *parent) { struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct list_head *next; struct dentry *dentry; spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); - next = next->next; } spin_unlock(&parent->d_lock); } -- cgit v0.10.2 From 24dff96a37a2ca319e75a74d3929b2de22447ca6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Oct 2014 23:44:00 -0400 Subject: fix misuses of f_count() in ppp and netlink we used to check for "nobody else could start doing anything with that opened file" by checking that refcount was 2 or less - one for descriptor table and one we'd acquired in fget() on the way to wherever we are. That was race-prone (somebody else might have had a reference to descriptor table and do fget() just as we'd been checking) and it had become flat-out incorrect back when we switched to fget_light() on those codepaths - unlike fget(), it doesn't grab an extra reference unless the descriptor table is shared. The same change allowed a race-free check, though - we are safe exactly when refcount is less than 2. It was a long time ago; pre-2.6.12 for ioctl() (the codepath leading to ppp one) and 2.6.17 for sendmsg() (netlink one). OTOH, netlink hadn't grown that check until 3.9 and ppp used to live in drivers/net, not drivers/net/ppp until 3.1. The bug existed well before that, though, and the same fix used to apply in old location of file. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fa0d717..90c639b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -594,7 +594,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (file == ppp->owner) ppp_shutdown_interface(ppp); } - if (atomic_long_read(&file->f_count) <= 2) { + if (atomic_long_read(&file->f_count) < 2) { ppp_release(NULL, file); err = 0; } else diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c416725..7a186e7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -715,7 +715,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, * after validation, the socket and the ring may only be used by a * single process, otherwise we fall back to copying. */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || + if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || atomic_read(&nlk->mapped) > 1) excl = false; -- cgit v0.10.2 From 115cbfdc609702a131c51281864c08f5d27b459a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Oct 2014 23:05:52 -0400 Subject: let path_init() failures treated the same way as subsequent link_path_walk() As it is, path_lookupat() and path_mounpoint() might end up leaking struct file reference in some cases. Spotted-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index d20d579..0f64aa4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1950,7 +1950,7 @@ static int path_lookupat(int dfd, const char *name, err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); if (unlikely(err)) - return err; + goto out; current->total_link_count = 0; err = link_path_walk(name, nd); @@ -1982,6 +1982,7 @@ static int path_lookupat(int dfd, const char *name, } } +out: if (base) fput(base); @@ -2301,7 +2302,7 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base); if (unlikely(err)) - return err; + goto out; current->total_link_count = 0; err = link_path_walk(name, &nd); -- cgit v0.10.2 From 810bb172671aec17cf85cc748120cf73c17af372 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Oct 2014 12:45:37 -0400 Subject: take dname_external() into fs/dcache.c never used outside and it's too low-level for legitimate uses outside of fs/dcache.c anyway Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 8221faae..d5a23fd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -264,6 +264,11 @@ static void __d_free_external(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } +static inline int dname_external(const struct dentry *dentry) +{ + return dentry->d_name.name != dentry->d_iname; +} + static void dentry_free(struct dentry *dentry) { if (unlikely(dname_external(dentry))) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 81b0315..27a7f00 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -226,11 +226,6 @@ struct dentry_operations { extern seqlock_t rename_lock; -static inline int dname_external(const struct dentry *dentry) -{ - return dentry->d_name.name != dentry->d_iname; -} - /* * These are the low-level FS interfaces to the dcache.. */ -- cgit v0.10.2 From 7b600f2abb36909e70963cc7c744c15983500bee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Oct 2014 13:31:58 -0400 Subject: don't need that forward declaration of struct nameidata in dcache.h anymore Signed-off-by: Al Viro diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 27a7f00..b2a2a08 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,7 +11,6 @@ #include #include -struct nameidata; struct path; struct vfsmount; -- cgit v0.10.2 From 50b220bbe7092bbfe4406adfe3a216337a64655d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Oct 2014 14:21:38 -0400 Subject: reiserfs: remove pointless forward declaration of struct nameidata Signed-off-by: Al Viro diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index 857ec7e..f620e96 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h @@ -7,7 +7,6 @@ struct inode; struct dentry; struct iattr; struct super_block; -struct nameidata; int reiserfs_xattr_register_handlers(void) __init; void reiserfs_xattr_unregister_handlers(void); -- cgit v0.10.2 From 8cc431165d8fbda43634dd15ab17f76a151c39a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 12 Oct 2014 11:59:58 -0500 Subject: vfs: Deduplicate code shared by xattr system calls operating on paths The following pairs of system calls dealing with extended attributes only differ in their behavior on whether the symbolic link is followed (when the named file is a symbolic link): - setxattr() and lsetxattr() - getxattr() and lgetxattr() - listxattr() and llistxattr() - removexattr() and lremovexattr() Despite this, the implementations all had duplicated code, so this commit redirects each of the above pairs of system calls to a corresponding function to which different lookup flags (LOOKUP_FOLLOW or 0) are passed. For me this reduced the stripped size of xattr.o from 8824 to 8248 bytes. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/xattr.c b/fs/xattr.c index c69e6d4..64e83ef 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -364,13 +364,12 @@ out: return error; } -SYSCALL_DEFINE5(setxattr, const char __user *, pathname, - const char __user *, name, const void __user *, value, - size_t, size, int, flags) +static int path_setxattr(const char __user *pathname, + const char __user *name, const void __user *value, + size_t size, int flags, unsigned int lookup_flags) { struct path path; int error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -388,28 +387,18 @@ retry: return error; } +SYSCALL_DEFINE5(setxattr, const char __user *, pathname, + const char __user *, name, const void __user *, value, + size_t, size, int, flags) +{ + return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, const char __user *, name, const void __user *, value, size_t, size, int, flags) { - struct path path; - int error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = mnt_want_write(path.mnt); - if (!error) { - error = setxattr(path.dentry, name, value, size, flags); - mnt_drop_write(path.mnt); - } - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_setxattr(pathname, name, value, size, flags, 0); } SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, @@ -481,12 +470,12 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, return error; } -SYSCALL_DEFINE4(getxattr, const char __user *, pathname, - const char __user *, name, void __user *, value, size_t, size) +static ssize_t path_getxattr(const char __user *pathname, + const char __user *name, void __user *value, + size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -500,23 +489,16 @@ retry: return error; } +SYSCALL_DEFINE4(getxattr, const char __user *, pathname, + const char __user *, name, void __user *, value, size_t, size) +{ + return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, const char __user *, name, void __user *, value, size_t, size) { - struct path path; - ssize_t error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = getxattr(path.dentry, name, value, size); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_getxattr(pathname, name, value, size, 0); } SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, @@ -571,12 +553,11 @@ listxattr(struct dentry *d, char __user *list, size_t size) return error; } -SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, - size_t, size) +static ssize_t path_listxattr(const char __user *pathname, char __user *list, + size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -590,23 +571,16 @@ retry: return error; } +SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, + size_t, size) +{ + return path_listxattr(pathname, list, size, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, size_t, size) { - struct path path; - ssize_t error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = listxattr(path.dentry, list, size); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_listxattr(pathname, list, size, 0); } SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) @@ -640,12 +614,11 @@ removexattr(struct dentry *d, const char __user *name) return vfs_removexattr(d, kname); } -SYSCALL_DEFINE2(removexattr, const char __user *, pathname, - const char __user *, name) +static int path_removexattr(const char __user *pathname, + const char __user *name, unsigned int lookup_flags) { struct path path; int error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -663,27 +636,16 @@ retry: return error; } +SYSCALL_DEFINE2(removexattr, const char __user *, pathname, + const char __user *, name) +{ + return path_removexattr(pathname, name, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, const char __user *, name) { - struct path path; - int error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = mnt_want_write(path.mnt); - if (!error) { - error = removexattr(path.dentry, name); - mnt_drop_write(path.mnt); - } - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_removexattr(pathname, name, 0); } SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) -- cgit v0.10.2 From a457606a6f81cfddfc9da1ef2a8bf2c65a8eb35e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 12 Oct 2014 14:29:29 -0500 Subject: fs/file_table.c: Update alloc_file() comment This comment is 5 years outdated; init_file() no longer exists. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index 385bfd3..a843623 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -150,18 +150,10 @@ over: /** * alloc_file - allocate and initialize a 'struct file' - * @mnt: the vfsmount on which the file will reside - * @dentry: the dentry representing the new file + * + * @path: the (dentry, vfsmount) pair for the new file * @mode: the mode with which the new file will be opened * @fop: the 'struct file_operations' for the new file - * - * Use this instead of get_empty_filp() to get a new - * 'struct file'. Do so because of the same initialization - * pitfalls reasons listed for init_file(). This is a - * preferred interface to using init_file(). - * - * If all the callers of init_file() are eliminated, its - * code should be moved into this function. */ struct file *alloc_file(struct path *path, fmode_t mode, const struct file_operations *fop) -- cgit v0.10.2