From 287548e46aa752ce9bb87fcff46f8aa794cc5037 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 06:50:06 -0400 Subject: split __follow_mount_rcu() into normal and .. cases Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 2358b32..da9c265 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -919,12 +919,11 @@ static inline bool managed_dentry_might_block(struct dentry *dentry) } /* - * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we - * meet a managed dentry and we're not walking to "..". True is returned to - * continue, false to abort. + * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if + * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode, bool reverse_transit) + struct inode **inode) { for (;;) { struct vfsmount *mounted; @@ -933,8 +932,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * that wants to block transit. */ *inode = path->dentry->d_inode; - if (!reverse_transit && - unlikely(managed_dentry_might_block(path->dentry))) + if (unlikely(managed_dentry_might_block(path->dentry))) return false; if (!d_mountpoint(path->dentry)) @@ -949,10 +947,29 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, } if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - return reverse_transit; + return false; return true; } +static void follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode) +{ + for (;;) { + struct vfsmount *mounted; + *inode = path->dentry->d_inode; + + if (!d_mountpoint(path->dentry)) + break; + + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + break; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + } +} + static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; @@ -982,7 +999,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); inode = nd->path.dentry->d_inode; } - __follow_mount_rcu(nd, &nd->path, &inode, true); + follow_mount_rcu(nd, &nd->path, &inode); nd->inode = inode; return 0; @@ -1157,7 +1174,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode, false))) + if (likely(__follow_mount_rcu(nd, path, inode))) return 0; unlazy: if (unlazy_walk(nd, dentry)) -- cgit v0.10.2 From dea3937619cb67d2ad08e2d29ae923875b1eeee9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 06:53:39 -0400 Subject: Trim excessive arguments of follow_mount_rcu() ... and kill a useless local variable in follow_dotdot_rcu(), while we are at it - follow_mount_rcu(nd, path, inode) *always* assigned value to *inode, and always it had been path->dentry->d_inode (aka nd->path.dentry->d_inode, since it always got &nd->path as the second argument). Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index da9c265..9880814 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -951,29 +951,21 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return true; } -static void follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode) +static void follow_mount_rcu(struct nameidata *nd) { - for (;;) { + while (d_mountpoint(nd->path.dentry)) { struct vfsmount *mounted; - *inode = path->dentry->d_inode; - - if (!d_mountpoint(path->dentry)) - break; - - mounted = __lookup_mnt(path->mnt, path->dentry, 1); + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); if (!mounted) break; - path->mnt = mounted; - path->dentry = mounted->mnt_root; - nd->seq = read_seqcount_begin(&path->dentry->d_seq); + nd->path.mnt = mounted; + nd->path.dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } } static int follow_dotdot_rcu(struct nameidata *nd) { - struct inode *inode = nd->inode; - set_root_rcu(nd); while (1) { @@ -989,7 +981,6 @@ static int follow_dotdot_rcu(struct nameidata *nd) seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) goto failed; - inode = parent->d_inode; nd->path.dentry = parent; nd->seq = seq; break; @@ -997,10 +988,9 @@ static int follow_dotdot_rcu(struct nameidata *nd) if (!follow_up_rcu(&nd->path)) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - inode = nd->path.dentry->d_inode; } - follow_mount_rcu(nd, &nd->path, &inode); - nd->inode = inode; + follow_mount_rcu(nd); + nd->inode = nd->path.dentry->d_inode; return 0; failed: -- cgit v0.10.2 From d6e9bd256c88ce5f4b668249e363a74f51393daa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 07:03:15 -0400 Subject: Lift the check for automount points into do_lookup() Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 9880814..1ab641f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -945,9 +945,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); } - - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - return false; return true; } @@ -1164,8 +1161,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode))) - return 0; + if (unlikely(!__follow_mount_rcu(nd, path, inode))) + goto unlazy; + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + goto unlazy; + return 0; unlazy: if (unlazy_walk(nd, dentry)) return -ECHILD; -- cgit v0.10.2 From aa38572954ade525817fe88c54faebf85e5a61c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 06:53:02 -0400 Subject: fs: pass exact type of data dirties to ->dirty_inode Tell the filesystem if we just updated timestamp (I_DIRTY_SYNC) or anything else, so that the filesystem can track internally if it needs to push out a transaction for fdatasync or not. This is just the prototype change with no user for it yet. I plan to push large XFS changes for the next merge window, and getting this trivial infrastructure in this window would help a lot to avoid tree interdependencies. Also remove incorrect comments that ->dirty_inode can't block. That has been changed a long time ago, and many implementations rely on it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 61b31ac..57d827d6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -104,7 +104,7 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); @@ -126,7 +126,7 @@ locking rules: s_umount alloc_inode: destroy_inode: -dirty_inode: (must not sleep) +dirty_inode: write_inode: drop_inode: !!!inode->i_lock!!! evict_inode: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 21a7dc4..88b9f55 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -211,7 +211,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, int); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f4b81d..d2177e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2522,7 +2522,7 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); -void btrfs_dirty_inode(struct inode *inode); +void btrfs_dirty_inode(struct inode *inode, int flags); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0..ecff7d7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4396,7 +4396,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode) +void btrfs_dirty_inode(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 68b2e43..3451d23 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3392,7 +3392,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext3_dirty_inode(struct inode *inode) +void ext3_dirty_inode(struct inode *inode, int flags) { handle_t *current_handle = ext3_journal_current_handle(); handle_t *handle; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a74b89c..1921392 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1813,7 +1813,7 @@ extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); extern int ext4_sync_inode(handle_t *, struct inode *); -extern void ext4_dirty_inode(struct inode *); +extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 50d0e9c..a5763e3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5733,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext4_dirty_inode(struct inode *inode) +void ext4_dirty_inode(struct inode *inode, int flags) { handle_t *handle; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 34591ee8..0f015a0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1007,9 +1007,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) * In short, make sure you hash any inodes _before_ you start marking * them dirty. * - * This function *must* be atomic for the I_DIRTY_PAGES case - - * set_page_dirty() is called under spinlock in several places. - * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of * the kernel-internal blockdev inode represents the dirtying time of the @@ -1028,7 +1025,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (sb->s_op->dirty_inode) - sb->s_op->dirty_inode(inode); + sb->s_op->dirty_inode(inode, flags); } /* diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e896e67..46ad619 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -357,7 +357,7 @@ error: return ERR_PTR(ret); } -void jffs2_dirty_inode(struct inode *inode) +void jffs2_dirty_inode(struct inode *inode, int flags) { struct iattr iattr; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 00bae7c..65c6c43 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -172,7 +172,7 @@ int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); -void jffs2_dirty_inode(struct inode *inode); +void jffs2_dirty_inode(struct inode *inode, int flags); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index eddbb37..1096559 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -173,7 +173,7 @@ void jfs_evict_inode(struct inode *inode) dquot_drop(inode); } -void jfs_dirty_inode(struct inode *inode) +void jfs_dirty_inode(struct inode *inode, int flags) { static int noisy = 5; diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 155e91e..ec2fb8b 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -28,7 +28,7 @@ extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_evict_inode(struct inode *); -extern void jfs_dirty_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *, int); extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 587f184..b954878 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -917,7 +917,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ -void nilfs_dirty_inode(struct inode *inode) +void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; struct nilfs_mdt_info *mdi = NILFS_MDT(inode); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a9c6a53..f02b9ad 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -269,7 +269,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); -extern void nilfs_dirty_inode(struct inode *); +extern void nilfs_dirty_inode(struct inode *, int flags); int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b216ff6..aa91089 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -568,7 +568,7 @@ static void destroy_inodecache(void) } /* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode(struct inode *inode) +static void reiserfs_dirty_inode(struct inode *inode, int flags) { struct reiserfs_transaction_handle th; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6db0bdaa..1ab0d22 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -382,7 +382,7 @@ done: end_writeback(inode); } -static void ubifs_dirty_inode(struct inode *inode) +static void ubifs_dirty_inode(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 98b9c91..1e3a7ce 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -925,7 +925,8 @@ xfs_fs_inode_init_once( */ STATIC void xfs_fs_dirty_inode( - struct inode *inode) + struct inode *inode, + int flags) { barrier(); XFS_I(inode)->i_update_core = 1; diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 85c1d30..5e06acf 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -909,7 +909,7 @@ extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_evict_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_reservation (struct inode *); -extern void ext3_dirty_inode(struct inode *); +extern void ext3_dirty_inode(struct inode *, int); extern int ext3_change_inode_journal_flag(struct inode *, int); extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); extern int ext3_can_truncate(struct inode *inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2416093..573028d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1618,7 +1618,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); -- cgit v0.10.2 From 55b23bde19c08f14127a27d461a4e079942c7258 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 May 2011 14:50:36 +0200 Subject: xattr: Fix error results for non-existent / invisible attributes Return -ENODATA when trying to read a user.* attribute which cannot exist: user space otherwise does not have a reasonable way to distinguish between non-existent and inaccessible attributes. Likewise, return -ENODATA when an unprivileged process tries to read a trusted.* attribute: to unprivileged processes, those attributes are invisible (listxattr() won't include them). Related to this bug report: https://bugzilla.redhat.com/660613 Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro diff --git a/fs/xattr.c b/fs/xattr.c index f1ef949..4be2e76 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -46,18 +46,22 @@ xattr_permission(struct inode *inode, const char *name, int mask) return 0; /* - * The trusted.* namespace can only be accessed by a privileged user. + * The trusted.* namespace can only be accessed by privileged users. */ - if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { + if (!capable(CAP_SYS_ADMIN)) + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; + return 0; + } - /* In user.* namespace, only regular files and directories can have + /* + * In the user.* namespace, only regular files and directories can have * extended attributes. For sticky directories, only the owner and - * privileged user can write attributes. + * privileged users can write attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return -EPERM; + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) return -EPERM; -- cgit v0.10.2 From c642808454ac81d6a6701da6022e93bbe47bb38b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 May 2011 14:52:09 +0200 Subject: vfs: Improve the bio_add_page() and bio_add_pc_page() descriptions The descriptions of bio_add_page() and bio_add_pc_page() are slightly inconsistent; improve them. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro diff --git a/fs/bio.c b/fs/bio.c index 840a0d7..9bfade8 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -638,10 +638,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. This should only be used by REQ_PC bios. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. + * + * This should only be used by REQ_PC bios. */ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) @@ -659,10 +660,9 @@ EXPORT_SYMBOL(bio_add_pc_page); * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. */ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) -- cgit v0.10.2 From 4b4563dc80594c6a2580aa52d9fcf0177a27074e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 09:28:01 -0400 Subject: fs: cosmetic inode.c cleanups Move the lock order description after all the includes, remove several fairly outdated and/or incorrect comments, move Andrea's copyright/changelog to the top where it belongs, remove the pointless filename in the top of the file comment, and remove to useless macros. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro diff --git a/fs/inode.c b/fs/inode.c index 990d284..0f7e88a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,9 +1,7 @@ /* - * linux/fs/inode.c - * * (C) 1997 Linus Torvalds + * (C) 1999 Andrea Arcangeli (dynamic inode allocation) */ - #include #include #include @@ -27,10 +25,11 @@ #include #include #include +#include /* for inode_has_buffers */ #include "internal.h" /* - * inode locking rules. + * Inode locking rules: * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() @@ -60,54 +59,11 @@ * inode_hash_lock */ -/* - * This is needed for the following functions: - * - inode_has_buffers - * - invalidate_bdev - * - * FIXME: remove all knowledge of the buffer layer from this file - */ -#include - -/* - * New inode.c implementation. - * - * This implementation has the basic premise of trying - * to be extremely low-overhead and SMP-safe, yet be - * simple enough to be "obviously correct". - * - * Famous last words. - */ - -/* inode dynamic allocation 1999, Andrea Arcangeli */ - -/* #define INODE_PARANOIA 1 */ -/* #define INODE_DEBUG 1 */ - -/* - * Inode lookup is no longer as critical as it used to be: - * most of the lookups are going to be through the dcache. - */ -#define I_HASHBITS i_hash_shift -#define I_HASHMASK i_hash_mask - static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -/* - * Each inode can be on two separate lists. One is - * the hash list of the inode, used for lookups. The - * other linked list is the "type" list: - * "in_use" - valid inode, i_count > 0, i_nlink > 0 - * "dirty" - as "in_use" but also dirty - * "unused" - valid inode, i_count = 0 - * - * A "dirty" list is maintained for each super block, - * allowing for low-overhead inode sync() operations. - */ - static LIST_HEAD(inode_lru); static DEFINE_SPINLOCK(inode_lru_lock); @@ -424,8 +380,8 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); + return tmp & i_hash_mask; } /** -- cgit v0.10.2 From 76f0b8d2d2c8d417623142069cdfde1cf1e108d5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:54 -0700 Subject: bfs: remove unnecessary dentry_unhash on dir rename Bfs does not have problems with references to unlinked directories. CC: tigran@aivazian.fsnet.co.uk Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index c7d1d06..b14cebf 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -224,9 +224,6 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct bfs_sb_info *info; int error = -ENOENT; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_bh = new_bh = NULL; old_inode = old_dentry->d_inode; if (S_ISDIR(old_inode->i_mode)) -- cgit v0.10.2 From 873ae4d5a8b282c6e2bbc09c6d59eeb1bec97ef7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:55 -0700 Subject: sysv: remove unnecessary dentry_unhash from rmdir, dir rename sysv does not have problems with references to unlinked directories. CC: Christoph Hellwig Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e2cc675..e474fbc 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -196,8 +196,6 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (sysv_empty_dir(inode)) { err = sysv_unlink(dir, dentry); if (!err) { @@ -224,9 +222,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct sysv_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; -- cgit v0.10.2 From cf0f0536fa65ca1353476c49feed34891f3f7134 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:56 -0700 Subject: jffs2: remove unnecessary dentry_unhash from rmdir, dir rename jffs2 does not have problems with references to unlinked directories. CC: David Woodhouse CC: linux-mtd@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 05f7332..82faddd 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -609,8 +609,6 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) int ret; uint32_t now = get_seconds(); - dentry_unhash(dentry); - for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; @@ -786,9 +784,6 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, uint8_t type; uint32_t now; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* The VFS will check for us and prevent trying to rename a * file over a directory and vice versa, but if it's a directory, * the VFS can't check whether the victim is empty. The filesystem -- cgit v0.10.2 From 44a8e6364e48ab93a1d86385b5fc9efe81395fa9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:57 -0700 Subject: jfs: remove unnecessary dentry_unhash from rmdir, dir rename jfs does not have problems with references to unlinked directories. CC: Dave Kleikamp CC: jfs-discussion@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 865df16..eaaf2b5 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -360,8 +360,6 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); - dentry_unhash(dentry); - /* Init inode for quota operations. */ dquot_initialize(dip); dquot_initialize(ip); @@ -1097,9 +1095,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); -- cgit v0.10.2 From 64370e2f9df572977757d70b1ad293b743dd9e20 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:58 -0700 Subject: logfs: remove unnecessary dentry_unhash from rmdir, dir rename logfs does not have problems with references to unlinked directories. CC: Joern Engel CC: logfs@logfs.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index f34c9cd..9ed89d1 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -273,8 +273,6 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - dentry_unhash(dentry); - if (!logfs_empty_dir(inode)) return -ENOTEMPTY; @@ -624,9 +622,6 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, loff_t pos; int err; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* 1. locate source dd */ err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); if (err) -- cgit v0.10.2 From dfb55de89879a1c32a70d0a510b3701ed9a6b855 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:59 -0700 Subject: nilfs2: remove unnecessary dentry_unhash from rmdir, dir rename nilfs2 does not have problems with references to unlinked directories. CC: KONISHI Ryusuke CC: linux-nilfs@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1102a5f..546849b 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,8 +334,6 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) struct nilfs_transaction_info ti; int err; - dentry_unhash(dentry); - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) return err; @@ -371,9 +369,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct nilfs_transaction_info ti; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; -- cgit v0.10.2 From 0e54ec1c3a002a9d5e57b5ac73a934cc15a0fe06 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:00 -0700 Subject: ubifs: remove unnecessary dentry_unhash from rmdir, dir rename ubifs does not have problems with references to unlinked directories. CC: Artem Bityutskiy CC: Adrian Hunter CC: linux-mtd@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c2b8094..ef5abd3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -656,8 +656,6 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; - dentry_unhash(dentry); - /* * Budget request settings: deletion direntry, deletion inode and * changing the parent inode. If budgeting fails, go ahead anyway @@ -978,9 +976,6 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. -- cgit v0.10.2 From 87161faae26503a8ebe1be5ba72073ae860dbfc7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:01 -0700 Subject: ufs: remove unnecessary dentry_unhash from rmdir, dir rename ufs does not have problems with references to unlinked directories. CC: Evgeniy Dushistov Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 953ebdf..29309e2 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -258,8 +258,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; - dentry_unhash(dentry); - lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); @@ -284,9 +282,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; -- cgit v0.10.2 From cc350c2764a657ee012efd5bd260a6cd5be2f877 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:02 -0700 Subject: reiserfs: remove unnecessary dentry_unhash from rmdir, dir rename Reiserfs does not have problems with references to unlinked directories. CC: reiserfs-devel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 76c8164..1186626 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -831,8 +831,6 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) INITIALIZE_PATH(path); struct reiserfs_dir_entry de; - dentry_unhash(dentry); - /* we will be doing 2 balancings and update 2 stat data, we change quotas * of the owner of the directory and of the owner of the parent directory. * The quota structure is possibly deleted only on last iput => outside @@ -1227,9 +1225,6 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned long savelink = 1; struct timespec ctime; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* three balancings: (1) old name removal, (2) new name insertion and (3) maybe "save" link insertion stat data updates: (1) old directory, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 50f1abc..e8a62f4 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -98,7 +98,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, I_MUTEX_CHILD, dir->i_sb); - dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) dentry->d_inode->i_flags |= S_DEAD; -- cgit v0.10.2 From 7020739df2fa0e2126fc9739987e016860f14323 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:03 -0700 Subject: udf: remove unnecessary dentry_unhash from rmdir, dir rename udf does not have problems with references to unlinked directories. CC: Jan Kara Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 4d76594..f1dce84 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -783,8 +783,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; - dentry_unhash(dentry); - retval = -ENOENT; fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) @@ -1083,9 +1081,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) -- cgit v0.10.2 From 8aaa0f5431d8d1181b3d1a1bcd8f3330c0ce275f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:04 -0700 Subject: omfs: remove unnecessary dentry_unhash on rmdir, dir rneame omfs does not have problems with references to unlinked directories. CC: Bob Copeland CC: linux-karma-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c368360c..3b8d397 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -241,11 +241,9 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) int ret; - if (S_ISDIR(inode->i_mode)) { - dentry_unhash(dentry); - if (!omfs_dir_is_empty(inode)) - return -ENOTEMPTY; - } + if (S_ISDIR(inode->i_mode) && + !omfs_dir_is_empty(inode)) + return -ENOTEMPTY; ret = omfs_delete_entry(dentry); if (ret) @@ -382,9 +380,6 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, int err; if (new_inode) { - if (S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* overwriting existing file/dir */ err = omfs_remove(new_dir, new_dentry); if (err) -- cgit v0.10.2 From 4e82d61b6ac4966b3b61c2d97ddf04928f037be1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:05 -0700 Subject: hfs: remove unnecessary dentry_unhash on rmdir, dir rename hfs does not have problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 1cb70cd..b4d70b1 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -253,9 +253,6 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - if (S_ISDIR(inode->i_mode)) - dentry_unhash(dentry); - if (S_ISDIR(inode->i_mode) && inode->i_size != 2) return -ENOTEMPTY; res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); @@ -286,9 +283,6 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - res = hfs_remove(new_dir, new_dentry); if (res) return res; -- cgit v0.10.2 From e3911785b8ae6897b3dae2af4fa296aa5a0f2c56 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:06 -0700 Subject: hfsplus: remove unnecessary dentry_unhash on rmdir, dir rename hfsplus does not have problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index b288350..4df5059 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -370,8 +370,6 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - dentry_unhash(dentry); - if (inode->i_size != 2) return -ENOTEMPTY; @@ -469,12 +467,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) { - dentry_unhash(new_dentry); + if (S_ISDIR(new_dentry->d_inode->i_mode)) res = hfsplus_rmdir(new_dir, new_dentry); - } else { + else res = hfsplus_unlink(new_dir, new_dentry); - } if (res) return res; } -- cgit v0.10.2 From e41a59e0550b7bb40fe8c3438d690712e9fd511c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:07 -0700 Subject: hostfs: remove unnecessary dentry_unhash on rmdir, dir rename hostfs does not have problems with references to unlinked directories. CC: Jeff Dike CC: Richard Weinberger CC: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6816b9..2638c834e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -683,8 +683,6 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - dentry_unhash(dentry); - if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); @@ -738,9 +736,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; - if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) - dentry_unhash(to); - if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { -- cgit v0.10.2 From 55e5b7e022eaaa805a44e3b6ecd5c8638d862050 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:08 -0700 Subject: ecryptfs: remove unnecessary dentry_unhash on rmdir, dir rename ecryptfs does not have problems with references to unlinked directories. CC: Tyler Hicks CC: Dustin Kirkland CC: ecryptfs-devel@lists.launchpad.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 227b409..4d4cc6a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -521,8 +521,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int rc; - dentry_unhash(dentry); - lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -573,9 +571,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); dget(lower_old_dentry); -- cgit v0.10.2 From 7ce605d93b775e8a960b0be244f7be565e73b3c1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:09 -0700 Subject: ncpfs: document dentry_unhash usage ncpfs returns EBUSY if there are any references to the directory. The dentry_unhash call only unhashes the dentry if there are no references. CC: Petr Vandrovec CC: linux-kernel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e3e646b..81c287d 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1033,8 +1033,11 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) DPRINTK("ncp_rmdir: removing %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(dentry); - error = -EBUSY; if (!d_unhashed(dentry)) goto out; -- cgit v0.10.2 From 76cc071a06afb4d2dd17bb1b855a233a419e7e02 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:10 -0700 Subject: ncpfs: fix rename over directory with dangling references ncpfs does not handle references to unlinked directories (or so it would seem given the ncp_rmdir check). Since it is also possible to rename over an empty directory, perform the same check here. CC: Petr Vandrovec CC: linux-kernel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 81c287d..9c51f62 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1144,8 +1144,16 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(new_dentry); + error = -EBUSY; + if (!d_unhashed(new_dentry)) + goto out; + } ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); -- cgit v0.10.2 From 86905d6d96f138a99326016e4f0ca933200e0729 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:11 -0700 Subject: 9p: remove unnecessary dentry_unhash on rmdir, dir rename 9p has no problems with references to unlinked directories. CC: Eric Van Hensbergen CC: Ron Minnich CC: Latchesar Ionkov CC: v9fs-developer@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8d7f3e6..7f6c677 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -814,7 +814,6 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { - dentry_unhash(d); return v9fs_remove(i, d, 1); } @@ -840,9 +839,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; -- cgit v0.10.2 From 94f9901b6c05d5e1e38d887179b8307efbc8e10c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:12 -0700 Subject: affs: remove unnecessary dentry_unhash on rmdir, dir rename affs has no problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 03330e2..e3e9efc 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -320,8 +320,6 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); - dentry_unhash(dentry); - return affs_remove_header(dentry); } @@ -419,9 +417,6 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); -- cgit v0.10.2 From 705da2de95ce79199a52f21d82dd9f36b89afd12 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:13 -0700 Subject: afs: remove unnecessary dentry_unhash on rmdir, dir rename afs has no problems with references to unlinked directories. CC: David Howells CC: linux-afs@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c4e051..20c106f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,8 +845,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - dentry_unhash(dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; @@ -1148,9 +1146,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct key *key; int ret; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - vnode = AFS_FS_I(old_dentry->d_inode); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); -- cgit v0.10.2 From 42b850b2806f8f55f5c3a0e3c78af738d5054fdc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:14 -0700 Subject: coda: remove unnecessary dentry_unhash on rmdir, dir rename Coda has no problems with references to unlinked directories. CC: Jan Harkes CC: coda@cs.cmu.edu CC: codalist@coda.cs.cmu.edu Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/coda/dir.c b/fs/coda/dir.c index a46126f..2b8dae4 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -336,8 +336,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) int len = de->d_name.len; int error; - dentry_unhash(de); - error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if (!error) { /* VFS may delete the child */ @@ -361,9 +359,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int new_length = new_dentry->d_name.len; int error; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); -- cgit v0.10.2 From 526e7ce5528c8addcf154acec41b615f71dce7e1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:15 -0700 Subject: fuse: remove unnecessary dentry_unhash on rmdir, dir rename Fuse has no problems with references to unlinked directories. CC: Miklos Szeredi CC: fuse-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0d0e3fa..d5016071 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -667,8 +667,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (IS_ERR(req)) return PTR_ERR(req); - dentry_unhash(entry); - req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; @@ -694,9 +692,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_req *req = fuse_get_req(fc); - if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) - dentry_unhash(newent); - if (IS_ERR(req)) return PTR_ERR(req); -- cgit v0.10.2 From b80d2c228d8206cadc32226750dbbf3b707bdd19 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:16 -0700 Subject: minix: remove unnecessary dentry_unhash on rmdir, dir rename Minix has no issues with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f60aed8..6e6777f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -168,8 +168,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { @@ -192,9 +190,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct minix_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; -- cgit v0.10.2 From 45adfef7d023004ff95bf63b5f2f0e2d88afac3f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:17 -0700 Subject: hpfs: remove unnecessary dentry_unhash on rmdir, dir rename Hpfs has no problems with references to unlinked directories. We leave one dentry_unhash call in place, in hpfs_unlink's strange path where it tries to truncate a file because the disk is full. I'm not sure what the full story is there. CC: Mikulas Patocka Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index ff0ce21..acf95da 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -439,8 +439,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int r; - dentry_unhash(dentry); - hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); err = -ENOENT; @@ -535,9 +533,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct fnode *fnode; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - if ((err = hpfs_chk_name(new_name, &new_len))) return err; err = 0; hpfs_adjust_length(old_name, &old_len); -- cgit v0.10.2 From f4ff0e25c5093dd89e9cac4a8f71a57587ada787 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:18 -0700 Subject: fat: remove unnecessary dentry_unhash on rmdir, dir rename fat does not have problems with references to unlinked directories. CC: OGAWA Hirofumi Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index be15437..3b222da 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -326,8 +326,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); /* * Check whether the directory is not in use, then check @@ -459,9 +457,6 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c61a678..20b4ea5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -824,8 +824,6 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); err = fat_dir_empty(inode); @@ -933,9 +931,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; -- cgit v0.10.2 From 98702467f829177b3993f17da9fe5c202d160e5e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:19 -0700 Subject: configfs: remove unnecessary dentry_unhash on rmdir, dir rename configfs does not have problems with references to unlinked directories. CC: Joel Becker Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9d17d35..9a37a9b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1359,8 +1359,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; - dentry_unhash(dentry); - if (dentry->d_parent == configfs_sb->s_root) return -EPERM; -- cgit v0.10.2 From 3d08bcc887a1c8d12be8d81f747ffa2e8a44b67b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 May 2011 12:23:34 -0700 Subject: mm: Wait for writeback when grabbing pages to begin a write When grabbing a page for a buffered IO write, the mm should wait for writeback on the page to complete so that the page does not become writable during the IO operation. This change is needed to provide page stability during writes for all filesystems. Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro diff --git a/mm/filemap.c b/mm/filemap.c index bcdc393..dac95a2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2327,7 +2327,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, repeat: page = find_lock_page(mapping, index); if (page) - return page; + goto found; page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) @@ -2340,6 +2340,8 @@ repeat: goto repeat; return NULL; } +found: + wait_on_page_writeback(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); -- cgit v0.10.2 From d76ee18a8551e33ad7dbd55cac38bc7b094f3abb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 May 2011 12:23:41 -0700 Subject: fs: block_page_mkwrite should wait for writeback to finish For filesystems such as nilfs2 and xfs that use block_page_mkwrite, modify that function to wait for pending writeback before allowing the page to become writable. This is needed to stabilize pages during writeback for those two filesystems. Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro diff --git a/fs/buffer.c b/fs/buffer.c index 698c6b2..49c9aad 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2382,6 +2382,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, ret = -EAGAIN; goto out_unlock; } + wait_on_page_writeback(page); return 0; out_unlock: unlock_page(page); -- cgit v0.10.2 From 69b4573296469fd3f70cf7044693074980517067 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 28 May 2011 08:25:51 -0700 Subject: Cache xattr security drop check for write v2 Some recent benchmarking on btrfs showed that a major scaling bottleneck on large systems on btrfs is currently the xattr lookup on every write. Why xattr lookup on every write I hear you ask? write wants to drop suid and security related xattrs that could set o capabilities for executables. To do that it currently looks up security.capability on EVERY write (even for non executables) to decide whether to drop it or not. In btrfs this causes an additional tree walk, hitting some per file system locks and quite bad scalability. In a simple read workload on a 8S system I saw over 90% CPU time in spinlocks related to that. Chris Mason tells me this is also a problem in ext4, where it hits the global mbcache lock. This patch adds a simple per inode to avoid this problem. We only do the lookup once per file and then if there is no xattr cache the decision. All xattr changes clear the flag. I also used the same flag to avoid the suid check, although that one is pretty cheap. A file system can also set this flag when it creates the inode, if it has a cheap way to do so. This is done for some common file systems in followon patches. With this patch a major part of the lock contention disappears for btrfs. Some testing on smaller systems didn't show significant performance changes, but at least it helps the larger systems and is generally more efficient. v2: Rename is_sgid. add file system helper. Cc: chris.mason@oracle.com Cc: josef@redhat.com Cc: viro@zeniv.linux.org.uk Cc: agruen@linbit.com Cc: Serge E. Hallyn Signed-off-by: Andi Kleen Signed-off-by: Al Viro diff --git a/fs/attr.c b/fs/attr.c index 91dbe2a..caf2aa5 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } + if ((ia_valid & ATTR_MODE)) { + mode_t amode = attr->ia_mode; + /* Flag setting protected by i_mutex */ + if (is_sxid(amode)) + inode->i_flags &= ~S_NOSEC; + } + now = current_fs_time(inode->i_sb); attr->ia_ctime = now; diff --git a/fs/xattr.c b/fs/xattr.c index 4be2e76..f060663 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -91,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, { struct inode *inode = dentry->d_inode; int error = -EOPNOTSUPP; + int issec = !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN); + if (issec) + inode->i_flags &= ~S_NOSEC; if (inode->i_op->setxattr) { error = inode->i_op->setxattr(dentry, name, value, size, flags); if (!error) { @@ -99,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (!strncmp(name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN)) { + } else if (issec) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(inode, suffix, value, size, flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 573028d..c55d6b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -237,6 +237,7 @@ struct inodes_stat_t { #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_NOSEC 4096 /* no suid or xattr security attributes */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -273,6 +274,7 @@ struct inodes_stat_t { #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) +#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) +static inline int is_sxid(mode_t mode) +{ + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); +} + +static inline void inode_has_no_xattr(struct inode *inode) +{ + if (!is_sxid(inode->i_mode)) + inode->i_flags |= S_NOSEC; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index dac95a2..d7b1057 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill) int file_remove_suid(struct file *file) { struct dentry *dentry = file->f_path.dentry; - int killsuid = should_remove_suid(dentry); - int killpriv = security_inode_need_killpriv(dentry); + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; int error = 0; + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + if (killpriv < 0) return killpriv; if (killpriv) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); + if (!error) + inode->i_flags |= S_NOSEC; return error; } -- cgit v0.10.2