From 19660af736ba00e1620970601dd313efedbbcfd2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2011 10:32:48 -0400 Subject: consolidate nameidata_..._drop_rcu() Merge these into a single function (unlazy_walk(nd, dentry)), kill ..._maybe variants Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 54fc993..1039cba 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -391,79 +391,28 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); -/** - * nameidata_drop_rcu - drop this nameidata out of rcu-walk - * @nd: nameidata pathwalk data to drop - * Returns: 0 on success, -ECHILD on failure - * +/* * Path walking has 2 modes, rcu-walk and ref-walk (see - * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt - * to drop out of rcu-walk mode and take normal reference counts on dentries - * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take - * refcounts at the last known good point before rcu-walk got stuck, so - * ref-walk may continue from there. If this is not successful (eg. a seqcount - * has changed), then failure is returned and path walk restarts from the - * beginning in ref-walk mode. - * - * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into - * ref-walk. Must be called from rcu-walk context. + * Documentation/filesystems/path-lookup.txt). In situations when we can't + * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab + * normal reference counts on dentries and vfsmounts to transition to rcu-walk + * mode. Refcounts are grabbed at the last known good point before rcu-walk + * got stuck, so ref-walk may continue from there. If this is not successful + * (eg. a seqcount has changed), then failure is returned and it's up to caller + * to restart the path walk from the beginning in ref-walk mode. */ -static int nameidata_drop_rcu(struct nameidata *nd) -{ - struct fs_struct *fs = current->fs; - struct dentry *dentry = nd->path.dentry; - int want_root = 0; - - BUG_ON(!(nd->flags & LOOKUP_RCU)); - if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { - want_root = 1; - spin_lock(&fs->lock); - if (nd->root.mnt != fs->root.mnt || - nd->root.dentry != fs->root.dentry) - goto err_root; - } - spin_lock(&dentry->d_lock); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err; - BUG_ON(nd->inode != dentry->d_inode); - spin_unlock(&dentry->d_lock); - if (want_root) { - path_get(&nd->root); - spin_unlock(&fs->lock); - } - mntget(nd->path.mnt); - - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - nd->flags &= ~LOOKUP_RCU; - return 0; -err: - spin_unlock(&dentry->d_lock); -err_root: - if (want_root) - spin_unlock(&fs->lock); - return -ECHILD; -} - -/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ -static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) -{ - if (nd->flags & LOOKUP_RCU) - return nameidata_drop_rcu(nd); - return 0; -} /** - * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk - * @nd: nameidata pathwalk data to drop - * @dentry: dentry to drop + * unlazy_walk - try to switch to ref-walk mode. + * @nd: nameidata pathwalk data + * @dentry: child of nd->path.dentry or NULL * Returns: 0 on success, -ECHILD on failure * - * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, - * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on - * @nd. Must be called from rcu-walk context. + * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry + * for ref-walk mode. @dentry must be a path found by a do_lookup call on + * @nd or NULL. Must be called from rcu-walk context. */ -static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) +static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) { struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; @@ -478,18 +427,25 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry goto err_root; } spin_lock(&parent->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err; - /* - * If the sequence check on the child dentry passed, then the child has - * not been removed from its parent. This means the parent dentry must - * be valid and able to take a reference at this point. - */ - BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; - spin_unlock(&dentry->d_lock); + if (!dentry) { + if (!__d_rcu_to_refcount(parent, nd->seq)) + goto err_parent; + BUG_ON(nd->inode != parent->d_inode); + } else { + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err_child; + /* + * If the sequence check on the child dentry passed, then + * the child has not been removed from its parent. This + * means the parent dentry must be valid and able to take + * a reference at this point. + */ + BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); + BUG_ON(!parent->d_count); + parent->d_count++; + spin_unlock(&dentry->d_lock); + } spin_unlock(&parent->d_lock); if (want_root) { path_get(&nd->root); @@ -501,8 +457,10 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry br_read_unlock(vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; -err: + +err_child: spin_unlock(&dentry->d_lock); +err_parent: spin_unlock(&parent->d_lock); err_root: if (want_root) @@ -510,22 +468,6 @@ err_root: return -ECHILD; } -/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ -static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) -{ - if (nd->flags & LOOKUP_RCU) { - if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - return -ECHILD; - } - } - return 0; -} - /** * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk * @nd: nameidata pathwalk data to drop @@ -1241,13 +1183,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (likely(__follow_mount_rcu(nd, path, inode, false))) return 0; unlazy: - if (dentry) { - if (nameidata_dentry_drop_rcu(nd, dentry)) - return -ECHILD; - } else { - if (nameidata_drop_rcu(nd)) - return -ECHILD; - } + if (unlazy_walk(nd, dentry)) + return -ECHILD; } else { dentry = __d_lookup(parent, name); } @@ -1303,7 +1240,7 @@ static inline int may_lookup(struct nameidata *nd) int err = exec_permission(nd->inode, IPERM_FLAG_RCU); if (err != -ECHILD) return err; - if (nameidata_drop_rcu(nd)) + if (unlazy_walk(nd, NULL)) return -ECHILD; } return exec_permission(nd->inode, 0); @@ -1357,8 +1294,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, return -ENOENT; } if (unlikely(inode->i_op->follow_link) && follow) { - if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) - return -ECHILD; + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + terminate_walk(nd); + return -ECHILD; + } + } BUG_ON(inode != path->dentry->d_inode); return 1; } -- cgit v0.10.2 From 9f1fafee9e42b73beb3aa51ab2d6a19bfddeb5fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2011 11:00:12 -0400 Subject: merge handle_reval_dot and nameidata_drop_rcu_last new helper: complete_walk(). Done on successful completion of walk, drops out of RCU mode, does d_revalidate of final result if that hadn't been done already. handle_reval_dot() and nameidata_drop_rcu_last() subsumed into that one; callers converted to use of complete_walk(). Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 1039cba..9f59431 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -469,43 +469,6 @@ err_root: } /** - * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk - * @nd: nameidata pathwalk data to drop - * Returns: 0 on success, -ECHILD on failure - * - * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. - * nd->path should be the final element of the lookup, so nd->root is discarded. - * Must be called from rcu-walk context. - */ -static int nameidata_drop_rcu_last(struct nameidata *nd) -{ - struct dentry *dentry = nd->path.dentry; - - BUG_ON(!(nd->flags & LOOKUP_RCU)); - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - spin_lock(&dentry->d_lock); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err_unlock; - BUG_ON(nd->inode != dentry->d_inode); - spin_unlock(&dentry->d_lock); - - mntget(nd->path.mnt); - - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - - return 0; - -err_unlock: - spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - return -ECHILD; -} - -/** * release_open_intent - free up open intent resources * @nd: pointer to nameidata */ @@ -548,26 +511,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } -/* - * handle_reval_path - force revalidation of a dentry - * - * In some situations the path walking code will trust dentries without - * revalidating them. This causes problems for filesystems that depend on - * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set - * (which indicates that it's possible for the dentry to go stale), force - * a d_revalidate call before proceeding. +/** + * complete_walk - successful completion of path walk + * @nd: pointer nameidata * - * Returns 0 if the revalidation was successful. If the revalidation fails, - * either return the error returned by d_revalidate or -ESTALE if the - * revalidation it just returned 0. If d_revalidate returns 0, we attempt to - * invalidate the dentry. It's up to the caller to handle putting references - * to the path if necessary. + * If we had been in RCU mode, drop out of it and legitimize nd->path. + * Revalidate the final result, unless we'd already done that during + * the path walk or the filesystem doesn't ask for it. Return 0 on + * success, -error on failure. In case of failure caller does not + * need to drop nd->path. */ -static inline int handle_reval_path(struct nameidata *nd) +static int complete_walk(struct nameidata *nd) { struct dentry *dentry = nd->path.dentry; int status; + if (nd->flags & LOOKUP_RCU) { + nd->flags &= ~LOOKUP_RCU; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + spin_lock(&dentry->d_lock); + if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { + spin_unlock(&dentry->d_lock); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; + } + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + mntget(nd->path.mnt); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } + if (likely(!(nd->flags & LOOKUP_JUMPED))) return 0; @@ -585,6 +561,7 @@ static inline int handle_reval_path(struct nameidata *nd) if (!status) status = -ESTALE; + path_put(&nd->path); return status; } @@ -1598,18 +1575,8 @@ static int path_lookupat(int dfd, const char *name, } } - if (nd->flags & LOOKUP_RCU) { - /* went all way through without dropping RCU */ - BUG_ON(err); - if (nameidata_drop_rcu_last(nd)) - err = -ECHILD; - } - - if (!err) { - err = handle_reval_path(nd); - if (err) - path_put(&nd->path); - } + if (!err) + err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { if (!nd->inode->i_op->lookup) { @@ -2075,13 +2042,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, return ERR_PTR(error); /* fallthrough */ case LAST_ROOT: - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } - error = handle_reval_path(nd); + error = complete_walk(nd); if (error) - goto exit; + return ERR_PTR(error); audit_inode(pathname, nd->path.dentry); if (open_flag & O_CREAT) { error = -EISDIR; @@ -2089,10 +2052,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } goto ok; case LAST_BIND: - /* can't be RCU mode here */ - error = handle_reval_path(nd); + error = complete_walk(nd); if (error) - goto exit; + return ERR_PTR(error); audit_inode(pathname, dir); goto ok; } @@ -2111,10 +2073,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) /* symlink */ return NULL; /* sayonara */ - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } + error = complete_walk(nd); + if (error) + return ERR_PTR(-ECHILD); error = -ENOTDIR; if (nd->flags & LOOKUP_DIRECTORY) { @@ -2126,11 +2087,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } /* create side of things */ - - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } + error = complete_walk(nd); + if (error) + return ERR_PTR(error); audit_inode(pathname, dir); error = -EISDIR; -- cgit v0.10.2 From 79fead47c51b874a26cf5865408a5b784b52a929 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 24 May 2011 20:12:08 +0200 Subject: exportfs: reallow building as a module Commit 990d6c2d7aee921e3bce22b2d6a750fd552262be ("vfs: Add name to file handle conversion support") changed EXPORTFS to be a bool. This was needed for earlier revisions of the original patch, but the actual commit put the code needing it into its own file that only gets compiled when FHANDLE is selected which in turn selects EXPORTFS. So EXPORTFS can be safely compiled as a module when not selecting FHANDLE. Signed-off-by: Jonas Gorski Acked-by: Aneesh Kumar K.V Signed-off-by: Al Viro diff --git a/fs/Kconfig b/fs/Kconfig index f3aa9b0..efb7d4e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -47,7 +47,7 @@ config FS_POSIX_ACL def_bool n config EXPORTFS - bool + tristate config FILE_LOCKING bool "Enable POSIX file locking API" if EXPERT -- cgit v0.10.2 From 7c6e984dfca8ff5b04d359a59b24f39a691b87d3 Mon Sep 17 00:00:00 2001 From: Roman Borisov Date: Wed, 25 May 2011 16:26:48 -0700 Subject: fs/namespace.c: bound mount propagation fix This issue was discovered by users of busybox. And the bug is actual for busybox users, I don't know how it affects others. Apparently, mount is called with and without MS_SILENT, and this affects mount() behaviour. But MS_SILENT is only supposed to affect kernel logging verbosity. The following script was run in an empty test directory: mkdir -p mount.dir mount.shared1 mount.shared2 touch mount.dir/a mount.dir/b mount -vv --bind mount.shared1 mount.shared1 mount -vv --make-rshared mount.shared1 mount -vv --bind mount.shared2 mount.shared2 mount -vv --make-rshared mount.shared2 mount -vv --bind mount.shared2 mount.shared1 mount -vv --bind mount.dir mount.shared2 ls -R mount.dir mount.shared1 mount.shared2 umount mount.dir mount.shared1 mount.shared2 2>/dev/null umount mount.dir mount.shared1 mount.shared2 2>/dev/null umount mount.dir mount.shared1 mount.shared2 2>/dev/null rm -f mount.dir/a mount.dir/b mount.dir/c rmdir mount.dir mount.shared1 mount.shared2 mount -vv was used to show the mount() call arguments and result. Output shows that flag argument has 0x00008000 = MS_SILENT bit: mount: mount('mount.shared1','mount.shared1','(null)',0x00009000,'(null)'):0 mount: mount('','mount.shared1','',0x0010c000,''):0 mount: mount('mount.shared2','mount.shared2','(null)',0x00009000,'(null)'):0 mount: mount('','mount.shared2','',0x0010c000,''):0 mount: mount('mount.shared2','mount.shared1','(null)',0x00009000,'(null)'):0 mount: mount('mount.dir','mount.shared2','(null)',0x00009000,'(null)'):0 mount.dir: a b mount.shared1: mount.shared2: a b After adding --loud option to remove MS_SILENT bit from just one mount cmd: mkdir -p mount.dir mount.shared1 mount.shared2 touch mount.dir/a mount.dir/b mount -vv --bind mount.shared1 mount.shared1 2>&1 mount -vv --make-rshared mount.shared1 2>&1 mount -vv --bind mount.shared2 mount.shared2 2>&1 mount -vv --loud --make-rshared mount.shared2 2>&1 # <-HERE mount -vv --bind mount.shared2 mount.shared1 2>&1 mount -vv --bind mount.dir mount.shared2 2>&1 ls -R mount.dir mount.shared1 mount.shared2 2>&1 umount mount.dir mount.shared1 mount.shared2 2>/dev/null umount mount.dir mount.shared1 mount.shared2 2>/dev/null umount mount.dir mount.shared1 mount.shared2 2>/dev/null rm -f mount.dir/a mount.dir/b mount.dir/c rmdir mount.dir mount.shared1 mount.shared2 The result is different now - look closely at mount.shared1 directory listing. Now it does show files 'a' and 'b': mount: mount('mount.shared1','mount.shared1','(null)',0x00009000,'(null)'):0 mount: mount('','mount.shared1','',0x0010c000,''):0 mount: mount('mount.shared2','mount.shared2','(null)',0x00009000,'(null)'):0 mount: mount('','mount.shared2','',0x00104000,''):0 mount: mount('mount.shared2','mount.shared1','(null)',0x00009000,'(null)'):0 mount: mount('mount.dir','mount.shared2','(null)',0x00009000,'(null)'):0 mount.dir: a b mount.shared1: a b mount.shared2: a b The analysis shows that MS_SILENT flag which is ON by default in any busybox-> mount operations cames to flags_to_propagation_type function and causes the error return while is_power_of_2 checking because the function expects only one bit set. This doesn't allow to do busybox->mount with any --make-[r]shared, --make-[r]private etc options. Moreover, the recently added flags_to_propagation_type() function doesn't allow us to do such operations as --make-[r]private --make-[r]shared etc. when MS_SILENT is on. The idea or clearing the MS_SILENT flag came from to Denys Vlasenko. Signed-off-by: Roman Borisov Reported-by: Denys Vlasenko Cc: Chuck Ebbert Cc: Alexander Shishkin Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index d99bcf5..fe59bd1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1695,7 +1695,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) static int flags_to_propagation_type(int flags) { - int type = flags & ~MS_REC; + int type = flags & ~(MS_REC | MS_SILENT); /* Fail if any non-propagation flags are set */ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) -- cgit v0.10.2 From 24da4fab5a617ecbf0f0c64e7ba7703383faa411 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 May 2011 00:23:34 +0200 Subject: vfs: Create __block_page_mkwrite() helper passing error values back Create __block_page_mkwrite() helper which does all what block_page_mkwrite() does except that it passes back errors from __block_write_begin / block_commit_write calls. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/buffer.c b/fs/buffer.c index a08bb8e..f6ad8f9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2332,23 +2332,22 @@ EXPORT_SYMBOL(block_commit_write); * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int -block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) +int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block) { struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; - int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ + int ret; lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || (page_offset(page) > size)) { - /* page got truncated out from underneath us */ - unlock_page(page); - goto out; + /* We overload EFAULT to mean page got truncated */ + ret = -EFAULT; + goto out_unlock; } /* page is wholly or partially inside EOF */ @@ -2361,18 +2360,22 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (!ret) ret = block_commit_write(page, 0, end); - if (unlikely(ret)) { - unlock_page(page); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else /* -ENOSPC, -EIO, etc */ - ret = VM_FAULT_SIGBUS; - } else - ret = VM_FAULT_LOCKED; - -out: + if (unlikely(ret < 0)) + goto out_unlock; + return 0; +out_unlock: + unlock_page(page); return ret; } +EXPORT_SYMBOL(__block_page_mkwrite); + +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block) +{ + int ret = __block_page_mkwrite(vma, vmf, get_block); + + return block_page_mkwrite_return(ret); +} EXPORT_SYMBOL(block_page_mkwrite); /* diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f5df235..2bf6a91 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,8 +217,22 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); +int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); +/* Convert errno to return value from ->page_mkwrite() call */ +static inline int block_page_mkwrite_return(int err) +{ + if (err == 0) + return VM_FAULT_LOCKED; + if (err == -EFAULT) + return VM_FAULT_NOPAGE; + if (err == -ENOMEM) + return VM_FAULT_OOM; + /* -ENOSPC, -EDQUOT, -EIO ... */ + return VM_FAULT_SIGBUS; +} sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, -- cgit v0.10.2 From ea13a86463fd0c26c2c209c53dc46b8eff81bad4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 May 2011 00:23:35 +0200 Subject: vfs: Block mmapped writes while the fs is frozen We should not allow file modification via mmap while the filesystem is frozen. So block in block_page_mkwrite() while the filesystem is frozen. We cannot do the blocking wait in __block_page_mkwrite() since e.g. ext4 will want to call that function with transaction started in some cases and that would deadlock. But we can at least do the non-blocking reliable check in __block_page_mkwrite() which is the hardest part anyway. We have to check for frozen filesystem with the page marked dirty and under page lock with which we then return from ->page_mkwrite(). Only that way we cannot race with writeback done by freezing code - either we mark the page dirty after the writeback has started, see freezing in progress and block, or writeback will wait for our page lock which is released only when the fault is done and then writeback will writeout and writeprotect the page again. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/buffer.c b/fs/buffer.c index f6ad8f9..b0675bf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2331,6 +2331,9 @@ EXPORT_SYMBOL(block_commit_write); * page lock we can determine safely if the page is beyond EOF. If it is not * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. + * + * Direct callers of this function should call vfs_check_frozen() so that page + * fault does not busyloop until the fs is thawed. */ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) @@ -2362,6 +2365,18 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret < 0)) goto out_unlock; + /* + * Freezing in progress? We check after the page is marked dirty and + * with page lock held so if the test here fails, we are sure freezing + * code will wait during syncing until the page fault is done - at that + * point page will be dirty and unlocked so freezing code will write it + * and writeprotect it again. + */ + set_page_dirty(page); + if (inode->i_sb->s_frozen != SB_UNFROZEN) { + ret = -EAGAIN; + goto out_unlock; + } return 0; out_unlock: unlock_page(page); @@ -2372,8 +2387,15 @@ EXPORT_SYMBOL(__block_page_mkwrite); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { - int ret = __block_page_mkwrite(vma, vmf, get_block); + int ret; + struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; + /* + * This check is racy but catches the common case. The check in + * __block_page_mkwrite() is reliable. + */ + vfs_check_frozen(sb, SB_FREEZE_WRITE); + ret = __block_page_mkwrite(vma, vmf, get_block); return block_page_mkwrite_return(ret); } EXPORT_SYMBOL(block_page_mkwrite); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 2bf6a91..503c8a6 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -230,6 +230,8 @@ static inline int block_page_mkwrite_return(int err) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; + if (err == -EAGAIN) + return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } -- cgit v0.10.2 From 48293699a09324d2e3c66bd53d10eed6d67937a0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:04 -0700 Subject: vfs: dentry_unhash immediately prior to rmdir This presumes that there is no reason to unhash a dentry if we fail because it is a mountpoint or the LSM check fails, and that the LSM checks do not depend on the dentry being unhashed. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 9f59431..af51199 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2564,24 +2564,24 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) return -EPERM; mutex_lock(&dentry->d_inode->i_mutex); - dentry_unhash(dentry); if (d_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_rmdir(dir, dentry); if (!error) { + dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) { dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); } + dput(dentry); } } mutex_unlock(&dentry->d_inode->i_mutex); if (!error) { d_delete(dentry); } - dput(dentry); return error; } -- cgit v0.10.2 From 64252c75a2196a0cf1e0d3777143ecfe0e3ae650 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:05 -0700 Subject: vfs: remove dget() from dentry_unhash() This serves no useful purpose that I can discern. All callers (rename, rmdir) hold their own reference to the dentry. A quick audit of all file systems showed no relevant checks on the value of d_count in vfs_rmdir/vfs_rename_dir paths. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index d5f8c8a..b1c72a9 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -414,7 +414,6 @@ again: mutex_unlock(&hpfs_i(inode)->i_parent_mutex); dentry_unhash(dentry); if (!d_unhashed(dentry)) { - dput(dentry); hpfs_unlock(dir->i_sb); return -ENOSPC; } @@ -422,7 +421,6 @@ again: !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); - dput(dentry); } else { struct iattr newattrs; /*printk("HPFS: truncating file before delete.\n");*/ @@ -430,7 +428,6 @@ again: newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; err = notify_change(dentry, &newattrs); put_write_access(inode); - dput(dentry); if (!err) goto again; } diff --git a/fs/namei.c b/fs/namei.c index af51199..8d11187 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2545,10 +2545,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) */ void dentry_unhash(struct dentry *dentry) { - dget(dentry); shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 2) + if (dentry->d_count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } @@ -2575,7 +2574,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); } - dput(dentry); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -3002,7 +3000,6 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, mutex_unlock(&target->i_mutex); if (d_unhashed(new_dentry)) d_rehash(new_dentry); - dput(new_dentry); } if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 47d2a44..50f1abc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -105,7 +105,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) mutex_unlock(&dentry->d_inode->i_mutex); if (!error) d_delete(dentry); - dput(dentry); return error; } -- cgit v0.10.2 From 79bf7c732b5ff75b96022ed9d29181afd3d2509c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:06 -0700 Subject: vfs: push dentry_unhash on rmdir into file systems Only a few file systems need this. Start by pushing it down into each fs rmdir method (except gfs2 and xfs) so it can be dealt with on a per-fs basis. This does not change behavior for any in-tree file systems. Acked-by: Christoph Hellwig Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7f6c677..ecd7717 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -814,6 +814,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { + dentry_unhash(d); return v9fs_remove(i, d, 1); } diff --git a/fs/affs/namei.c b/fs/affs/namei.c index e3e9efc..d087153 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -320,6 +320,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); + dentry_unhash(dentry); + return affs_remove_header(dentry); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20c106f..9a7f414 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,6 +845,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); + dentry_unhash(dentry); + ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f55ae23..87d95a8 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,6 +583,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; + dentry_unhash(dentry); + if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0..c692dad 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3062,6 +3062,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; + dentry_unhash(dentry); + trans = __unlink_start_trans(dir, dentry); if (IS_ERR(trans)) return PTR_ERR(trans); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1a867a3..d2e5490 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -827,6 +827,9 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) int err = -EROFS; int op; + if ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) + dentry_unhash(dentry); + if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8852470..cee5896 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1461,6 +1461,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cFYI(1, "cifs_rmdir, inode = 0x%p", inode); + dentry_unhash(direntry); + xid = GetXid(); full_path = build_path_from_dentry(direntry); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 2b8dae4..9f72b75 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -336,6 +336,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) int len = de->d_name.len; int error; + dentry_unhash(de); + error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if (!error) { /* VFS may delete the child */ diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 3313dd1..9908c20 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1355,6 +1355,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; + dentry_unhash(dentry); + if (dentry->d_parent == configfs_sb->s_root) return -EPERM; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4d4cc6a..c88612f 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -521,6 +521,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int rc; + dentry_unhash(dentry); + lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 4d70db1..0697175 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -227,6 +227,8 @@ static int exofs_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (exofs_empty_dir(inode)) { err = exofs_unlink(dir, dentry); if (!err) { diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index ed5c5d4..7a5ad97 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -296,6 +296,8 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (ext2_empty_dir(inode)) { err = ext2_unlink(dir, dentry); if (!err) { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 32f3b86..552f94d 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2074,6 +2074,8 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) struct ext3_dir_entry_2 * de; handle_t *handle; + dentry_unhash(dentry); + /* Initialize quotas before so that eventual writes go in * separate transaction */ dquot_initialize(dir); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67fd0b0..957580d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2123,6 +2123,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) struct ext4_dir_entry_2 *de; handle_t *handle; + dentry_unhash(dentry); + /* Initialize quotas before so that eventual writes go in * separate transaction */ dquot_initialize(dir); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 7114990..0c25cea 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -326,6 +326,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; + dentry_unhash(dentry); + lock_super(sb); /* * Check whether the directory is not in use, then check diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index adae3fb..d7b9383 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -824,6 +824,8 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; + dentry_unhash(dentry); + lock_super(sb); err = fat_dir_empty(inode); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c6ba49b..40d5c2a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -667,6 +667,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (IS_ERR(req)) return PTR_ERR(req); + dentry_unhash(entry); + req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index b4d70b1..616cfe0 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -253,6 +253,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; + if (S_ISDIR(inode->i_mode)) + dentry_unhash(dentry); + if (S_ISDIR(inode->i_mode) && inode->i_size != 2) return -ENOTEMPTY; res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 4df5059..23451a9 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -370,6 +370,8 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; + dentry_unhash(dentry); + if (inode->i_size != 2) return -ENOTEMPTY; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2638c834e..73ea3ba 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -683,6 +683,8 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; + dentry_unhash(dentry); + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b1c72a9..b9fe158 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -461,6 +461,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int r; + dentry_unhash(dentry); + hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); mutex_lock(&hpfs_i(inode)->i_parent_mutex); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 82faddd..727d644 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -609,6 +609,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) int ret; uint32_t now = get_seconds(); + dentry_unhash(dentry); + for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index eaaf2b5..0569dac 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -360,6 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); + dentry_unhash(dentry); + /* Init inode for quota operations. */ dquot_initialize(dip); dquot_initialize(ip); diff --git a/fs/libfs.c b/fs/libfs.c index c88eab5..1e2ba5a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -311,6 +311,8 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; + dentry_unhash(dentry); + drop_nlink(dentry->d_inode); simple_unlink(dir, dentry); drop_nlink(dir); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1..2b32734 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -273,6 +273,8 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + dentry_unhash(dentry); + if (!logfs_empty_dir(inode)) return -ENOTEMPTY; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 6e6777f..091626f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -168,6 +168,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { diff --git a/fs/namei.c b/fs/namei.c index 8d11187..596edb5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2568,7 +2568,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) else { error = security_inode_rmdir(dir, dentry); if (!error) { - dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) { dentry->d_inode->i_flags |= S_DEAD; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f6946bb..57336b7 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1033,6 +1033,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) DPRINTK("ncp_rmdir: removing %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); + dentry_unhash(dentry); + error = -EBUSY; if (!d_unhashed(dentry)) goto out; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672..48483b5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1748,6 +1748,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dentry_unhash(dentry); + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 546849b..78306e6 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,6 +334,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) struct nilfs_transaction_info ti; int err; + dentry_unhash(dentry); + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) return err; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index e5d738c..b017ebb 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -810,6 +810,9 @@ static int ocfs2_unlink(struct inode *dir, (unsigned long long)OCFS2_I(dir)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); + if (S_ISDIR(inode->i_mode)) + dentry_unhash(dentry); + dquot_initialize(dir); BUG_ON(dentry->d_parent->d_inode != dir); diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index de4ff29..95ef443 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -240,8 +240,12 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int ret; - if (S_ISDIR(inode->i_mode) && !omfs_dir_is_empty(inode)) - return -ENOTEMPTY; + + if (S_ISDIR(inode->i_mode)) { + dentry_unhash(dentry); + if (!omfs_dir_is_empty(inode)) + return -ENOTEMPTY; + } ret = omfs_delete_entry(dentry); if (ret) diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 1186626..43e94f0 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -831,6 +831,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) INITIALIZE_PATH(path); struct reiserfs_dir_entry de; + dentry_unhash(dentry); + /* we will be doing 2 balancings and update 2 stat data, we change quotas * of the owner of the directory and of the owner of the parent directory. * The quota structure is possibly deleted only on last iput => outside diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e474fbc..fac64ac 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -196,6 +196,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (sysv_empty_dir(inode)) { err = sysv_unlink(dir, dentry); if (!err) { diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7217d67..6ca9176 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -656,6 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + dentry_unhash(dentry); + /* * Budget request settings: deletion direntry, deletion inode and * changing the parent inode. If budgeting fails, go ahead anyway diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f1dce84..b70f026 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -783,6 +783,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; + dentry_unhash(dentry); + retval = -ENOENT; fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 29309e2..3a769d5 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -258,6 +258,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; + dentry_unhash(dentry); + lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); -- cgit v0.10.2 From e4eaac06bcccb2a70bca6a2de9871882dce2aa14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:07 -0700 Subject: vfs: push dentry_unhash on rename_dir into file systems Only a few file systems need this. Start by pushing it down into each rename method (except gfs2 and xfs) so that it can be dealt with on a per-fs basis. Acked-by: Christoph Hellwig Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index ecd7717..8d7f3e6 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -840,6 +840,9 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d087153..03330e2 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -419,6 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9a7f414..2c4e051 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1148,6 +1148,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct key *key; int ret; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + vnode = AFS_FS_I(old_dentry->d_inode); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index b14cebf..c7d1d06 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -224,6 +224,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct bfs_sb_info *info; int error = -ENOENT; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + old_bh = new_bh = NULL; old_inode = old_dentry->d_inode; if (S_ISDIR(old_inode->i_mode)) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c692dad..3a33ae3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6994,6 +6994,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, u64 root_objectid; int ret; + if (new_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d2e5490..377b964 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -869,6 +869,9 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, struct ceph_mds_request *req; int err; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + if (ceph_snap(old_dir) != ceph_snap(new_dir)) return -EXDEV; if (ceph_snap(old_dir) != CEPH_NOSNAP || diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index cee5896..18546b7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1571,6 +1571,9 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; int xid, rc, tmprc; + if (target_dentry->d_inode && S_ISDIR(target_dentry->d_inode->i_mode)) + dentry_unhash(target_dentry); + cifs_sb = CIFS_SB(source_dir->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9f72b75..a46126f 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -361,6 +361,9 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int new_length = new_dentry->d_name.len; int error; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c88612f..227b409 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -573,6 +573,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); dget(lower_old_dentry); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 0697175..de252e5 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -251,6 +251,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, struct exofs_dir_entry *old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = exofs_find_entry(old_dir, old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 7a5ad97..516c31d 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -320,6 +320,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 552f94d..f89b1d4 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2298,6 +2298,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct ext3_dir_entry_2 * old_de, * new_de; int retval, flush_file = 0; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 957580d..792d06e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2352,6 +2352,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct ext4_dir_entry_2 *old_de, *new_de; int retval, force_da_alloc = 0; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 0c25cea..c3eccbd 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -459,6 +459,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index d7b9383..e2466b2 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -933,6 +933,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 40d5c2a..e462a7a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -693,6 +693,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, struct fuse_rename_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_req *req = fuse_get_req(fc); + + if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) + dentry_unhash(newent); + if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 616cfe0..1cb70cd 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -286,6 +286,9 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { + if (S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + res = hfs_remove(new_dir, new_dentry); if (res) return res; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 23451a9..b288350 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -469,10 +469,12 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) + if (S_ISDIR(new_dentry->d_inode->i_mode)) { + dentry_unhash(new_dentry); res = hfsplus_rmdir(new_dir, new_dentry); - else + } else { res = hfsplus_unlink(new_dir, new_dentry); + } if (res) return res; } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 73ea3ba..e6816b9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -738,6 +738,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; + if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) + dentry_unhash(to); + if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b9fe158..d3db95f 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -561,6 +561,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh; struct fnode *fnode; int err; + + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + if ((err = hpfs_chk_name(new_name, &new_len))) return err; err = 0; hpfs_adjust_length(old_name, &old_len); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 727d644..05f7332 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -786,6 +786,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, uint8_t type; uint32_t now; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* The VFS will check for us and prevent trying to rename a * file over a directory and vice versa, but if it's a directory, * the VFS can't check whether the victim is empty. The filesystem diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 0569dac..865df16 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1097,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/libfs.c b/fs/libfs.c index 1e2ba5a..91a3710 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -325,6 +325,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode); + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + if (!simple_empty(new_dentry)) return -ENOTEMPTY; diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 2b32734..f34c9cd 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -624,6 +624,9 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, loff_t pos; int err; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* 1. locate source dd */ err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); if (err) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 091626f..f60aed8 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -192,6 +192,9 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct minix_dir_entry * old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/namei.c b/fs/namei.c index 596edb5..787ebc8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2950,12 +2950,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * HOWEVER, it relies on the assumption that any object with ->lookup() * has no more than 1 dentry. If "hybrid" objects will ever appear, * we'd better make sure that there's no link(2) for them. - * d) some filesystems don't support opened-but-unlinked directories, - * either because of layout or because they are not ready to deal with - * all cases correctly. The latter will be fixed (taking this sort of - * stuff into VFS), but the former is not going away. Solution: the same - * trick as in rmdir(). - * e) conversion from fhandle to dentry may come in the wrong moment - when + * d) conversion from fhandle to dentry may come in the wrong moment - when * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on * ->i_mutex on parents, which works but leads to some truly excessive @@ -2986,11 +2981,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, mutex_lock(&target->i_mutex); if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; - else { - if (target) - dentry_unhash(new_dentry); + else error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - } if (target) { if (!error) { target->i_flags |= S_DEAD; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 57336b7..e3e646b 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1141,6 +1141,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48483b5..87daf79 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1959,6 +1959,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, new_dentry->d_count); + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* * For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 78306e6..1102a5f 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -371,6 +371,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct nilfs_transaction_info ti; int err; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b017ebb..f3582a6 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1066,6 +1066,9 @@ static int ocfs2_rename(struct inode *old_dir, struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; struct ocfs2_dir_lookup_result target_insert = { NULL, }; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* At some point it might be nice to break this function up a * bit. */ diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 95ef443..c368360c 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -382,6 +382,9 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, int err; if (new_inode) { + if (S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* overwriting existing file/dir */ err = omfs_remove(new_dir, new_dentry); if (err) diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 43e94f0..76c8164 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1227,6 +1227,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned long savelink = 1; struct timespec ctime; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* three balancings: (1) old name removal, (2) new name insertion and (3) maybe "save" link insertion stat data updates: (1) old directory, diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index fac64ac..e2cc675 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -224,6 +224,9 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct sysv_dir_entry * old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 6ca9176..d80810b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -978,6 +978,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. diff --git a/fs/udf/namei.c b/fs/udf/namei.c index b70f026..4d76594 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1083,6 +1083,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 3a769d5..953ebdf 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -284,6 +284,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; -- cgit v0.10.2 From a71905f0db41d4b2b01044fb40f97656fefc44a7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:08 -0700 Subject: vfs: update dentry_unhash() comment The helper is now only called by file systems, not the VFS. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 787ebc8..4c5cbd5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2529,10 +2529,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) } /* - * We try to drop the dentry early: we should have - * a usage count of 2 if we're the only user of this - * dentry, and if that is true (possibly after pruning - * the dcache), then we drop the dentry now. + * The dentry_unhash() helper will try to drop the dentry early: we + * should have a usage count of 2 if we're the only user of this + * dentry, and if that is true (possibly after pruning the dcache), + * then we drop the dentry now. * * A low-level filesystem can, if it choses, legally * do a -- cgit v0.10.2 From 5c5d3f3b871a719e2c6413b85009c723adeb27e1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:09 -0700 Subject: libfs: drop unneeded dentry_unhash There are no libfs issues with dangling references to empty directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/libfs.c b/fs/libfs.c index 91a3710..c88eab5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -311,8 +311,6 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; - dentry_unhash(dentry); - drop_nlink(dentry->d_inode); simple_unlink(dir, dentry); drop_nlink(dir); @@ -325,9 +323,6 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - if (!simple_empty(new_dentry)) return -ENOTEMPTY; -- cgit v0.10.2 From b5afd2c406f5c6272d916fd705f44f070fbbc0ba Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 24 May 2011 13:06:10 -0700 Subject: vfs: fix vfs_rename_dir for FS_RENAME_DOES_D_MOVE filesystems vfs_rename_dir() doesn't properly account for filesystems with FS_RENAME_DOES_D_MOVE. If new_dentry has a target inode attached, it unhashes the new_dentry prior to the rename() iop and rehashes it after, but doesn't account for the possibility that rename() may have swapped {old,new}_dentry. For FS_RENAME_DOES_D_MOVE filesystems, it rehashes new_dentry (now the old renamed-from name, which d_move() expected to go away), such that a subsequent lookup will find it. Currently all FS_RENAME_DOES_D_MOVE filesystems compensate for this by failing in d_revalidate. The bug was introduced by: commit 349457ccf2592c14bdf13b6706170ae2e94931b1 "[PATCH] Allow file systems to manually d_move() inside of ->rename()" Fix by not rehashing the new dentry. Rehashing used to be needed by d_move() but isn't anymore. Reported-by: Sage Weil Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 4c5cbd5..a1593ba 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2989,8 +2989,6 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, dont_mount(new_dentry); } mutex_unlock(&target->i_mutex); - if (d_unhashed(new_dentry)) - d_rehash(new_dentry); } if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) -- cgit v0.10.2 From 912dbc15d953791f013b0c64a8093ab0490e5f40 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:11 -0700 Subject: vfs: clean up vfs_rmdir Simplify the control flow with an out label. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index a1593ba..18c3293 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2563,23 +2563,26 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) return -EPERM; mutex_lock(&dentry->d_inode->i_mutex); + + error = -EBUSY; if (d_mountpoint(dentry)) - error = -EBUSY; - else { - error = security_inode_rmdir(dir, dentry); - if (!error) { - error = dir->i_op->rmdir(dir, dentry); - if (!error) { - dentry->d_inode->i_flags |= S_DEAD; - dont_mount(dentry); - } - } - } + goto out; + + error = security_inode_rmdir(dir, dentry); + if (error) + goto out; + + error = dir->i_op->rmdir(dir, dentry); + if (error) + goto out; + + dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); + +out: mutex_unlock(&dentry->d_inode->i_mutex); - if (!error) { + if (!error) d_delete(dentry); - } - return error; } -- cgit v0.10.2 From 9055cba711891a6313232629cd6bbca7c901e07f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:12 -0700 Subject: vfs: clean up vfs_rename_dir Simplify control flow through vfs_rename_dir. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 18c3293..ecb16d3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2963,7 +2963,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { int error = 0; - struct inode *target; + struct inode *target = new_dentry->d_inode; /* * If we are going to change the parent - check write permissions, @@ -2979,20 +2979,24 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - target = new_dentry->d_inode; if (target) mutex_lock(&target->i_mutex); - if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + + error = -EBUSY; + if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) + goto out; + + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (error) + goto out; + if (target) { - if (!error) { - target->i_flags |= S_DEAD; - dont_mount(new_dentry); - } - mutex_unlock(&target->i_mutex); + target->i_flags |= S_DEAD; + dont_mount(new_dentry); } +out: + if (target) + mutex_unlock(&target->i_mutex); if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry,new_dentry); -- cgit v0.10.2 From 51892bbb57e87854c27c105317797823f8891e68 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:13 -0700 Subject: vfs: clean up vfs_rename_other Simplify control flow to match vfs_rename_dir. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index ecb16d3..f90f059 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3006,7 +3006,7 @@ out: static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct inode *target; + struct inode *target = new_dentry->d_inode; int error; error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -3014,19 +3014,22 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, return error; dget(new_dentry); - target = new_dentry->d_inode; if (target) mutex_lock(&target->i_mutex); + + error = -EBUSY; if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - if (!error) { - if (target) - dont_mount(new_dentry); - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); - } + goto out; + + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (error) + goto out; + + if (target) + dont_mount(new_dentry); + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) + d_move(old_dentry, new_dentry); +out: if (target) mutex_unlock(&target->i_mutex); dput(new_dentry); -- cgit v0.10.2 From 051e8f0ee23c9ca963e2a0208153c6cd475f05fb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:14 -0700 Subject: ceph: remove unnecessary dentry_unhash calls Ceph does not need these, and they screw up our use of the dcache as a consistent cache. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 377b964..1a867a3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -827,9 +827,6 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) int err = -EROFS; int op; - if ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) - dentry_unhash(dentry); - if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, @@ -869,9 +866,6 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, struct ceph_mds_request *req; int err; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - if (ceph_snap(old_dir) != ceph_snap(new_dir)) return -EXDEV; if (ceph_snap(old_dir) != CEPH_NOSNAP || -- cgit v0.10.2 From f64f58f85403a5a8a21f1fbbc7798e56580785cf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:15 -0700 Subject: btrfs: remove unnecessary dentry_unhash in rmdir/rename_dir Btrfs has no problems with lingering references to unlinked directory inodes. CC: Chris Mason CC: linux-btrfs@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3a33ae3..7cd8ab0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3062,8 +3062,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - dentry_unhash(dentry); - trans = __unlink_start_trans(dir, dentry); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -6994,9 +6992,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, u64 root_objectid; int ret; - if (new_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; -- cgit v0.10.2 From 40ebc0af58a88652c9a9c1e9126c8fcf9fce1f6e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:16 -0700 Subject: ext4: remove unnecessary dentry_unhash on rmdir/rename_dir ext4 has no problems with lingering references to unlinked directory inodes. CC: "Theodore Ts'o" CC: Andreas Dilger CC: linux-ext4@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 792d06e..67fd0b0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2123,8 +2123,6 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) struct ext4_dir_entry_2 *de; handle_t *handle; - dentry_unhash(dentry); - /* Initialize quotas before so that eventual writes go in * separate transaction */ dquot_initialize(dir); @@ -2352,9 +2350,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct ext4_dir_entry_2 *old_de, *new_de; int retval, force_da_alloc = 0; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); -- cgit v0.10.2 From 5a61a245f7489feec865264e2627c5071aef8c66 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:17 -0700 Subject: ext3: remove unnecessary dentry_unhash on rmdir/rename_dir ext3 has no problems with lingering references to unlinked directory inodes. CC: Jan Kara CC: Andrew Morton CC: Andreas Dilger CC: linux-ext4@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index f89b1d4..32f3b86 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2074,8 +2074,6 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) struct ext3_dir_entry_2 * de; handle_t *handle; - dentry_unhash(dentry); - /* Initialize quotas before so that eventual writes go in * separate transaction */ dquot_initialize(dir); @@ -2298,9 +2296,6 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct ext3_dir_entry_2 * old_de, * new_de; int retval, flush_file = 0; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); -- cgit v0.10.2 From 5afcb940fa37f57d124155f30a1a8d7794a476e0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:18 -0700 Subject: ext2: remove unnecessary dentry_unhash on rmdir/rename_dir ext2 has no problems with lingering references to unlinked directory inodes. CC: Jan Kara CC: linux-ext4@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 516c31d..ed5c5d4 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -296,8 +296,6 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (ext2_empty_dir(inode)) { err = ext2_unlink(dir, dentry); if (!err) { @@ -320,9 +318,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); -- cgit v0.10.2 From 052e2a1ba2d07e724ae9cc0608389292276cb77a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:19 -0700 Subject: nfs: remove unnecessary dentry_unhash on rmdir/rename_dir NFS has no problems with lingering references to unlinked directory inodes. CC: Trond Myklebust CC: linux-nfs@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 87daf79..7237672 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1748,8 +1748,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - dentry_unhash(dentry); - error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) @@ -1959,9 +1957,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, new_dentry->d_count); - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* * For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the -- cgit v0.10.2 From 8cbfa53b1cd08ae3b315bbb55f7396f5a6a6a4f7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:20 -0700 Subject: exofs: remove unnecessary dentry_unhash on rmdir/rename_dir Exofs has no problems with lingering references to unlinked directory inodes. CC: Benny Halevy CC: osd-dev@open-osd.org Acked-by: Boaz Harrosh Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index de252e5..4d70db1 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -227,8 +227,6 @@ static int exofs_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (exofs_empty_dir(inode)) { err = exofs_unlink(dir, dentry); if (!err) { @@ -251,9 +249,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, struct exofs_dir_entry *old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = exofs_find_entry(old_dir, old_dentry, &old_page); if (!old_de) goto out; -- cgit v0.10.2 From 7ca57363880c2b321ff64d34a82ae4af86ee52d5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:21 -0700 Subject: ocfs2: remove unnecessary dentry_unhash on rmdir/rename_dir Ocfs2 has no issues with lingering references to unlinked directory inodes. CC: Mark Fasheh CC: ocfs2-devel@oss.oracle.com Acked-by: Joel Becker Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f3582a6..e5d738c 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -810,9 +810,6 @@ static int ocfs2_unlink(struct inode *dir, (unsigned long long)OCFS2_I(dir)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); - if (S_ISDIR(inode->i_mode)) - dentry_unhash(dentry); - dquot_initialize(dir); BUG_ON(dentry->d_parent->d_inode != dir); @@ -1066,9 +1063,6 @@ static int ocfs2_rename(struct inode *old_dir, struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; struct ocfs2_dir_lookup_result target_insert = { NULL, }; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* At some point it might be nice to break this function up a * bit. */ -- cgit v0.10.2 From b6ff24a333267a6810e28ee5b9fc539d149c52f0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 May 2011 13:06:22 -0700 Subject: cifs: remove unnecessary dentry_unhash on rmdir/rename_dir Cifs has no problems with lingering references to unlinked directory inodes. CC: Steve French CC: linux-cifs@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 18546b7..8852470 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1461,8 +1461,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cFYI(1, "cifs_rmdir, inode = 0x%p", inode); - dentry_unhash(direntry); - xid = GetXid(); full_path = build_path_from_dentry(direntry); @@ -1571,9 +1569,6 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; int xid, rc, tmprc; - if (target_dentry->d_inode && S_ISDIR(target_dentry->d_inode->i_mode)) - dentry_unhash(target_dentry); - cifs_sb = CIFS_SB(source_dir->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) -- cgit v0.10.2